diff --git a/.github/actions/common_setup/action.yml b/.github/actions/common_setup/action.yml index b02413adc44..e492fa97816 100644 --- a/.github/actions/common_setup/action.yml +++ b/.github/actions/common_setup/action.yml @@ -18,9 +18,6 @@ runs: echo "Setup the common ENV variables" cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/${{inputs.job_type}} - REPO_COPY=${{runner.temp}}/${{inputs.job_type}}/git-repo-copy - IMAGES_PATH=${{runner.temp}}/images_path - REPORTS_PATH=${{runner.temp}}/reports_dir EOF if [ -z "${{env.GITHUB_JOB_OVERRIDDEN}}" ] && [ "true" == "${{inputs.nested_job}}" ]; then echo "The GITHUB_JOB_OVERRIDDEN ENV is unset, and must be set for the nested jobs" @@ -30,6 +27,4 @@ runs: shell: bash run: | # to remove every leftovers - sudo rm -fr "$TEMP_PATH" - mkdir -p "$REPO_COPY" - cp -a "$GITHUB_WORKSPACE"/. "$REPO_COPY"/ + sudo rm -fr "$TEMP_PATH" && mkdir -p "$TEMP_PATH" diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index f6af4778cf1..ef554a1b0ff 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -10,27 +10,21 @@ on: # yamllint disable-line rule:truthy branches: - 'backport/**' jobs: - CheckLabels: + RunConfig: runs-on: [self-hosted, style-checker] - # Run the first check always, even if the CI is cancelled - if: ${{ always() }} + outputs: + data: ${{ steps.runconfig.outputs.CI_DATA }} steps: - name: Check out repository code uses: ClickHouse/checkout@v1 with: - clear-repository: true + clear-repository: true # to ensure correct digests + fetch-depth: 0 # to get version + filter: tree:0 - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 run_check.py - PythonUnitTests: - runs-on: [self-hosted, style-checker] - needs: CheckLabels - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -40,273 +34,237 @@ jobs: echo "Testing $dir" python3 -m unittest discover -s "$dir" -p 'test_*.py' done - DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] - needs: CheckLabels - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - - name: Images check + - name: PrepareRunConfig + id: runconfig run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_images_check.py --suffix aarch64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v3 - with: - name: changed_images_aarch64 - path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json - DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] - needs: CheckLabels - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - - name: Images check + echo "::group::configure CI run" + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json + echo "::endgroup::" + + echo "::group::CI run configure results" + python3 -m json.tool ${{ runner.temp }}/ci_run_data.json + echo "::endgroup::" + + { + echo 'CI_DATA<> "$GITHUB_OUTPUT" + - name: Re-create GH statuses for skipped jobs if any run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_images_check.py --suffix amd64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v3 - with: - name: changed_images_amd64 - path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json - DockerHubPush: - needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - filter: tree:0 - - name: Download changed aarch64 images - uses: actions/download-artifact@v3 - with: - name: changed_images_aarch64 - path: ${{ runner.temp }} - - name: Download changed amd64 images - uses: actions/download-artifact@v3 - with: - name: changed_images_amd64 - path: ${{ runner.temp }} - - name: Images check - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v3 - with: - name: changed_images - path: ${{ runner.temp }}/changed_images.json + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses + BuildDockers: + needs: [RunConfig] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_docker.yml + with: + data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckX86: - needs: [BuilderDebRelease] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check X86 + test_name: Compatibility check (amd64) runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} run_command: | - cd "$REPO_COPY/tests/ci" python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions CompatibilityCheckAarch64: - needs: [BuilderDebAarch64] + needs: [RunConfig, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check X86 + test_name: Compatibility check (aarch64) runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} run_command: | - cd "$REPO_COPY/tests/ci" python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### BuilderDebRelease: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_release checkout_depth: 0 + data: ${{ needs.RunConfig.outputs.data }} BuilderDebAarch64: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_aarch64 checkout_depth: 0 + data: ${{ needs.RunConfig.outputs.data }} BuilderDebAsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_asan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebTsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_tsan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebDebug: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_debug + data: ${{ needs.RunConfig.outputs.data }} BuilderBinDarwin: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_darwin + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinDarwinAarch64: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_darwin_aarch64 + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ DockerServerImages: - needs: - - BuilderDebRelease - - BuilderDebAarch64 - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - filter: tree:0 - - name: Check docker clickhouse/clickhouse-server building - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type head --no-push \ - --image-repo clickhouse/clickhouse-server --image-path docker/server - python3 docker_server.py --release-type head --no-push \ - --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker server and keeper images + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself + run_command: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head --no-push \ + --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + python3 docker_server.py --release-type head --no-push \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: - if: ${{ success() || failure() }} + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: - - BuilderDebRelease + - RunConfig - BuilderDebAarch64 - BuilderDebAsan - - BuilderDebTsan - BuilderDebDebug + - BuilderDebRelease + - BuilderDebTsan uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} additional_envs: | NEEDS_DATA<> "$GITHUB_OUTPUT" + - name: Re-create GH statuses for skipped jobs if any run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_images_check.py --suffix amd64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v3 - with: - name: changed_images_amd64 - path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json - DockerHubPush: - needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - filter: tree:0 - - name: Download changed aarch64 images - uses: actions/download-artifact@v3 - with: - name: changed_images_aarch64 - path: ${{ runner.temp }} - - name: Download changed amd64 images - uses: actions/download-artifact@v3 - with: - name: changed_images_amd64 - path: ${{ runner.temp }} - - name: Images check - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v3 - with: - name: changed_images - path: ${{ runner.temp }}/changed_images.json + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses + BuildDockers: + needs: [RunConfig] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_docker.yml + with: + data: ${{ needs.RunConfig.outputs.data }} + set_latest: true StyleCheck: - needs: DockerHubPush - if: ${{ success() || failure() }} + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: test_name: Style check runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} run_command: | - cd "$REPO_COPY/tests/ci" python3 style_check.py --no-push CompatibilityCheckX86: - needs: [BuilderDebRelease] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check X86 + test_name: Compatibility check (amd64) runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} run_command: | - cd "$REPO_COPY/tests/ci" python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions CompatibilityCheckAarch64: - needs: [BuilderDebAarch64] + needs: [RunConfig, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check X86 + test_name: Compatibility check (aarch64) runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} run_command: | - cd "$REPO_COPY/tests/ci" python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### +# TODO: never skip builds! BuilderDebRelease: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: - checkout_depth: 0 build_name: package_release + checkout_depth: 0 + data: ${{ needs.RunConfig.outputs.data }} BuilderDebAarch64: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: - checkout_depth: 0 build_name: package_aarch64 + checkout_depth: 0 + data: ${{ needs.RunConfig.outputs.data }} BuilderBinRelease: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: - checkout_depth: 0 build_name: binary_release + checkout_depth: 0 # otherwise we will have no info about contributors + data: ${{ needs.RunConfig.outputs.data }} BuilderDebAsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_asan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebUBsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_ubsan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebTsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_tsan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebMsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_msan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebDebug: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_debug + data: ${{ needs.RunConfig.outputs.data }} ########################################################################################## ##################################### SPECIAL BUILDS ##################################### ########################################################################################## BuilderBinClangTidy: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_tidy + data: ${{ needs.RunConfig.outputs.data }} BuilderBinDarwin: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_darwin + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinAarch64: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_aarch64 + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinFreeBSD: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_freebsd + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinDarwinAarch64: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_darwin_aarch64 + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinPPC64: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_ppc64le + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinAmd64Compat: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_amd64_compat + data: ${{ needs.RunConfig.outputs.data }} + checkout_depth: 0 + BuilderBinAmd64Musl: + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_build.yml + with: + build_name: binary_amd64_musl + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinAarch64V80Compat: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_aarch64_v80compat + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinRISCV64: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_riscv64 + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinS390X: - needs: [DockerHubPush] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_s390x + data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ DockerServerImages: - needs: - - BuilderDebRelease - - BuilderDebAarch64 - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - filter: tree:0 - - name: Check docker clickhouse/clickhouse-server building - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type head \ - --image-repo clickhouse/clickhouse-server --image-path docker/server - python3 docker_server.py --release-type head \ - --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker server and keeper images + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + # FIXME: avoid using 0 checkout + checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself + run_command: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head \ + --image-repo clickhouse/clickhouse-server --image-path docker/server + python3 docker_server.py --release-type head \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: - if: ${{ success() || failure() }} + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: + - RunConfig - BuilderBinRelease - BuilderDebAarch64 - BuilderDebAsan @@ -269,16 +278,18 @@ jobs: with: test_name: ClickHouse build check runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} additional_envs: | NEEDS_DATA<> "$GITHUB_OUTPUT" + BuildDockers: + needs: [RunConfig] + uses: ./.github/workflows/reusable_docker.yml + with: + data: "${{ needs.RunConfig.outputs.data }}" + set_latest: true SonarCloud: runs-on: [self-hosted, builder] env: diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c92fa52ed4e..bd2b2b60904 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -13,37 +13,25 @@ on: # yamllint disable-line rule:truthy - opened branches: - master - paths-ignore: - - '**.md' - - 'docker/docs/**' - - 'docs/**' - - 'utils/check-style/aspell-ignore/**' - - 'tests/ci/docs_check.py' - - '.github/workflows/docs_check.yml' ########################################################################################## ##################################### SMALL CHECKS ####################################### ########################################################################################## jobs: - CheckLabels: + RunConfig: runs-on: [self-hosted, style-checker] - # Run the first check always, even if the CI is cancelled - if: ${{ always() }} + outputs: + data: ${{ steps.runconfig.outputs.CI_DATA }} steps: - name: Check out repository code uses: ClickHouse/checkout@v1 with: - clear-repository: true + clear-repository: true # to ensure correct digests + fetch-depth: 0 # to get version + filter: tree:0 - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 run_check.py - PythonUnitTests: - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -53,244 +41,260 @@ jobs: echo "Testing $dir" python3 -m unittest discover -s "$dir" -p 'test_*.py' done - DockerHubPushAarch64: - needs: CheckLabels - runs-on: [self-hosted, style-checker-aarch64] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - - name: Images check + - name: PrepareRunConfig + id: runconfig run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_images_check.py --suffix aarch64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v3 - with: - name: changed_images_aarch64 - path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json - DockerHubPushAmd64: - needs: CheckLabels - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - - name: Images check + echo "::group::configure CI run" + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json + echo "::endgroup::" + + echo "::group::CI run configure results" + python3 -m json.tool ${{ runner.temp }}/ci_run_data.json + echo "::endgroup::" + + { + echo 'CI_DATA<> "$GITHUB_OUTPUT" + - name: Re-create GH statuses for skipped jobs if any run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_images_check.py --suffix amd64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v3 - with: - name: changed_images_amd64 - path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json - DockerHubPush: - needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - filter: tree:0 - - name: Download changed aarch64 images - uses: actions/download-artifact@v3 - with: - name: changed_images_aarch64 - path: ${{ runner.temp }} - - name: Download changed amd64 images - uses: actions/download-artifact@v3 - with: - name: changed_images_amd64 - path: ${{ runner.temp }} - - name: Images check + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses + - name: Style check early + # hack to run style check before the docker build job if possible (style-check image not changed) + if: contains(fromJson(steps.runconfig.outputs.CI_DATA).jobs_data.jobs_to_do, 'Style check early') run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - - name: Upload images files to artifacts - uses: actions/upload-artifact@v3 - with: - name: changed_images - path: ${{ runner.temp }}/changed_images.json + DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n') + export DOCKER_TAG=$DOCKER_TAG + python3 ./tests/ci/style_check.py --no-push + BuildDockers: + needs: [RunConfig] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_docker.yml + with: + data: ${{ needs.RunConfig.outputs.data }} StyleCheck: - needs: DockerHubPush - # We need additional `&& ! cancelled()` to have the job being able to cancel - if: ${{ success() || failure() || ( always() && ! cancelled() ) }} + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: test_name: Style check runner_type: style-checker run_command: | - cd "$REPO_COPY/tests/ci" python3 style_check.py + data: ${{ needs.RunConfig.outputs.data }} secrets: secret_envs: | ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_OUTPUT" + - name: Re-create GH statuses for skipped jobs if any + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses + BuildDockers: + needs: [RunConfig] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_docker.yml + with: + data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckX86: - needs: [BuilderDebRelease] + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check X86 + test_name: Compatibility check (amd64) runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} run_command: | - cd "$REPO_COPY/tests/ci" python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions CompatibilityCheckAarch64: - needs: [BuilderDebAarch64] + needs: [RunConfig, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check X86 + test_name: Compatibility check (aarch64) runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} run_command: | - cd "$REPO_COPY/tests/ci" python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### BuilderDebRelease: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_release checkout_depth: 0 + data: ${{ needs.RunConfig.outputs.data }} BuilderDebAarch64: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_aarch64 checkout_depth: 0 + data: ${{ needs.RunConfig.outputs.data }} BuilderDebAsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_asan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebUBsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_ubsan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebTsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_tsan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebMsan: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_msan + data: ${{ needs.RunConfig.outputs.data }} BuilderDebDebug: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: package_debug + data: ${{ needs.RunConfig.outputs.data }} BuilderBinDarwin: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_darwin checkout_depth: 0 + data: ${{ needs.RunConfig.outputs.data }} BuilderBinDarwinAarch64: - needs: [DockerHubPush] + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: build_name: binary_darwin_aarch64 checkout_depth: 0 + data: ${{ needs.RunConfig.outputs.data }} ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ DockerServerImages: - needs: - - BuilderDebRelease - - BuilderDebAarch64 - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - filter: tree:0 - - name: Check docker clickhouse/clickhouse-server building - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type head --no-push \ - --image-repo clickhouse/clickhouse-server --image-path docker/server - python3 docker_server.py --release-type head --no-push \ - --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker server and keeper images + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + checkout_depth: 0 + run_command: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head --no-push \ + --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + python3 docker_server.py --release-type head --no-push \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: - if: ${{ success() || failure() }} + # run report check for failed builds to indicate the CI error + if: ${{ !cancelled() }} needs: + - RunConfig - BuilderDebRelease - BuilderDebAarch64 - BuilderDebAsan @@ -189,30 +187,38 @@ jobs: with: test_name: ClickHouse build check runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} additional_envs: | NEEDS_DATA<> "$GITHUB_ENV" << 'EOF' ${{inputs.additional_envs}} + DOCKER_TAG<> "$GITHUB_ENV" - name: Apply sparse checkout for contrib # in order to check that it doesn't break build @@ -60,20 +69,20 @@ jobs: uses: ./.github/actions/common_setup with: job_type: build_check - - name: Download changed images - uses: actions/download-artifact@v3 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} + - name: Pre + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.build_name}}' - name: Build run: | - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + python3 "$GITHUB_WORKSPACE/tests/ci/build_check.py" "$BUILD_NAME" + - name: Post + # it still be build report to upload for failed build job + if: always() + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}' + - name: Mark as done + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.build_name}}' - name: Clean if: always() uses: ./.github/actions/clean diff --git a/.github/workflows/reusable_docker.yml b/.github/workflows/reusable_docker.yml new file mode 100644 index 00000000000..08a5740e7e0 --- /dev/null +++ b/.github/workflows/reusable_docker.yml @@ -0,0 +1,68 @@ +name: Build docker images +'on': + workflow_call: + inputs: + data: + description: json with ci data from todo job + required: true + type: string + set_latest: + description: set latest tag for resulting multiarch manifest + required: false + type: boolean + default: false +jobs: + DockerBuildAarch64: + runs-on: [self-hosted, style-checker-aarch64] + if: | + !failure() && !cancelled() && toJson(fromJson(inputs.data).docker_data.missing_aarch64) != '[]' + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + ref: ${{ fromJson(inputs.data).git_ref }} + - name: Build images + run: | + python3 "${GITHUB_WORKSPACE}/tests/ci/docker_images_check.py" \ + --suffix aarch64 \ + --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \ + --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_aarch64) }}' + DockerBuildAmd64: + runs-on: [self-hosted, style-checker] + if: | + !failure() && !cancelled() && toJson(fromJson(inputs.data).docker_data.missing_amd64) != '[]' + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + ref: ${{ fromJson(inputs.data).git_ref }} + - name: Build images + run: | + python3 "${GITHUB_WORKSPACE}/tests/ci/docker_images_check.py" \ + --suffix amd64 \ + --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \ + --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_amd64) }}' + DockerMultiArchManifest: + needs: [DockerBuildAmd64, DockerBuildAarch64] + runs-on: [self-hosted, style-checker] + if: | + !failure() && !cancelled() && toJson(fromJson(inputs.data).docker_data.missing_multi) != '[]' + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + ref: ${{ fromJson(inputs.data).git_ref }} + - name: Build images + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + if [ "${{ inputs.set_latest }}" == "true" ]; then + echo "latest tag will be set for resulting manifests" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \ + --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \ + --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}' \ + --set-latest + else + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \ + --image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \ + --missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}' + fi diff --git a/.github/workflows/reusable_simple_job.yml b/.github/workflows/reusable_simple_job.yml new file mode 100644 index 00000000000..ea196a32664 --- /dev/null +++ b/.github/workflows/reusable_simple_job.yml @@ -0,0 +1,90 @@ +### For the pure soul wishes to move it to another place +# https://github.com/orgs/community/discussions/9050 + +name: Simple job +'on': + workflow_call: + inputs: + test_name: + description: the value of test type from tests/ci/ci_config.py, ends up as $CHECK_NAME ENV + required: true + type: string + runner_type: + description: the label of runner to use + required: true + type: string + run_command: + description: the command to launch the check + default: "" + required: false + type: string + checkout_depth: + description: the value of the git shallow checkout + required: false + type: number + default: 1 + submodules: + description: if the submodules should be checked out + required: false + type: boolean + default: false + additional_envs: + description: additional ENV variables to setup the job + type: string + working-directory: + description: sets custom working directory + type: string + default: "" + git_ref: + description: commit to use, merge commit for pr or head + required: false + type: string + default: ${{ github.event.after }} # no merge commit + secrets: + secret_envs: + description: if given, it's passed to the environments + required: false + + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + CHECK_NAME: ${{inputs.test_name}} + +jobs: + Test: + runs-on: [self-hosted, '${{inputs.runner_type}}'] + name: ${{inputs.test_name}} + env: + GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}} + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + ref: ${{ inputs.git_ref }} + submodules: ${{inputs.submodules}} + fetch-depth: ${{inputs.checkout_depth}} + filter: tree:0 + - name: Set build envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + CHECK_NAME=${{ inputs.test_name }} + ${{inputs.additional_envs}} + ${{secrets.secret_envs}} + EOF + - name: Common setup + uses: ./.github/actions/common_setup + with: + job_type: test + - name: Run + run: | + if [ -n '${{ inputs.working-directory }}' ]; then + cd "${{ inputs.working-directory }}" + else + cd "$GITHUB_WORKSPACE/tests/ci" + fi + ${{ inputs.run_command }} + - name: Clean + if: always() + uses: ./.github/actions/clean diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml index e82d2d51596..09177ad887a 100644 --- a/.github/workflows/reusable_test.yml +++ b/.github/workflows/reusable_test.yml @@ -14,13 +14,10 @@ name: Testing workflow required: true type: string run_command: - description: the command to launch the check. Usually starts with `cd '$REPO_COPY/tests/ci'` - required: true + description: the command to launch the check + default: "" + required: false type: string - batches: - description: how many batches for the test will be launched - default: 1 - type: number checkout_depth: description: the value of the git shallow checkout required: false @@ -34,80 +31,89 @@ name: Testing workflow additional_envs: description: additional ENV variables to setup the job type: string + data: + description: ci data + type: string + required: true + working-directory: + description: sets custom working directory + type: string + default: "" secrets: secret_envs: description: if given, it's passed to the environments required: false + env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 CHECK_NAME: ${{inputs.test_name}} jobs: - PrepareStrategy: - # batches < 1 is misconfiguration, - # and we need this step only for batches > 1 - if: ${{ inputs.batches > 1 }} - runs-on: [self-hosted, style-checker-aarch64] - outputs: - batches: ${{steps.batches.outputs.batches}} - steps: - - name: Calculate batches - id: batches - run: | - batches_output=$(python3 -c 'import json; print(json.dumps(list(range(${{inputs.batches}}))))') - echo "batches=${batches_output}" >> "$GITHUB_OUTPUT" Test: - # If PrepareStrategy is skipped for batches == 1, - # we still need to launch the test. - # `! failure()` is mandatory here to launch on skipped Job - # `&& !cancelled()` to allow the be cancelable - if: ${{ ( !failure() && !cancelled() ) && inputs.batches > 0 }} - # Do not add `-0` to the end, if there's only one batch - name: ${{inputs.test_name}}${{ inputs.batches > 1 && format('-{0}',matrix.batch) || '' }} - env: - GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}${{ inputs.batches > 1 && format('-{0}',matrix.batch) || '' }} runs-on: [self-hosted, '${{inputs.runner_type}}'] - needs: [PrepareStrategy] + if: ${{ !failure() && !cancelled() && contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.test_name) }} + name: ${{inputs.test_name}}${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].num_batches > 1 && format('-{0}',matrix.batch) || '' }} + env: + GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].num_batches > 1 && format('-{0}',matrix.batch) || '' }} strategy: fail-fast: false # we always wait for entire matrix matrix: - # if PrepareStrategy does not have batches, we use 0 - batch: ${{ needs.PrepareStrategy.outputs.batches - && fromJson(needs.PrepareStrategy.outputs.batches) - || fromJson('[0]')}} + batch: ${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].batches }} steps: - name: Check out repository code uses: ClickHouse/checkout@v1 with: clear-repository: true + ref: ${{ fromJson(inputs.data).git_ref }} submodules: ${{inputs.submodules}} fetch-depth: ${{inputs.checkout_depth}} filter: tree:0 - name: Set build envs run: | cat >> "$GITHUB_ENV" << 'EOF' + CHECK_NAME=${{ inputs.test_name }} ${{inputs.additional_envs}} ${{secrets.secret_envs}} + DOCKER_TAG< 1}} + if: ${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].num_batches > 1 }} run: | cat >> "$GITHUB_ENV" << 'EOF' RUN_BY_HASH_NUM=${{matrix.batch}} - RUN_BY_HASH_TOTAL=${{inputs.batches}} + RUN_BY_HASH_TOTAL=${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].num_batches }} EOF - - name: Run test - run: ${{inputs.run_command}} + - name: Pre run + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.test_name}}' + - name: Run + run: | + if [ -n "${{ inputs.working-directory }}" ]; then + cd "${{ inputs.working-directory }}" + else + cd "$GITHUB_WORKSPACE/tests/ci" + fi + if [ -n "$(echo '${{ inputs.run_command }}' | tr -d '\n')" ]; then + echo "Running command from workflow input" + ${{ inputs.run_command }} + else + echo "Running command from job config" + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --run --job-name '${{inputs.test_name}}' + fi + - name: Post run + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}' + - name: Mark as done + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.test_name}}' --batch ${{matrix.batch}} - name: Clean if: always() uses: ./.github/actions/clean diff --git a/.gitmessage b/.gitmessage new file mode 100644 index 00000000000..f4a25a837bc --- /dev/null +++ b/.gitmessage @@ -0,0 +1,10 @@ + + +## To avoid merge commit in CI run (add a leading space to apply): +#no-merge-commit + +## Running specified job (add a leading space to apply): +#job_ +#job_stateless_tests_release +#job_package_debug +#job_integration_tests_asan diff --git a/CHANGELOG.md b/CHANGELOG.md index aa40012617c..283000f1804 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ ### Table of Contents +**[ClickHouse release v23.12, 2023-12-28](#2312)**
+**[ClickHouse release v23.11, 2023-12-06](#2311)**
**[ClickHouse release v23.10, 2023-11-02](#2310)**
**[ClickHouse release v23.9, 2023-09-28](#239)**
**[ClickHouse release v23.8 LTS, 2023-08-31](#238)**
@@ -13,7 +15,357 @@ # 2023 Changelog -### ClickHouse release 23.10, 2023-11-02 +### ClickHouse release 23.12, 2023-12-28 + +#### Backward Incompatible Change +* Fix check for non-deterministic functions in TTL expressions. Previously, you could create a TTL expression with non-deterministic functions in some cases, which could lead to undefined behavior later. This fixes [#37250](https://github.com/ClickHouse/ClickHouse/issues/37250). Disallow TTL expressions that don't depend on any columns of a table by default. It can be allowed back by `SET allow_suspicious_ttl_expressions = 1` or `SET compatibility = '23.11'`. Closes [#37286](https://github.com/ClickHouse/ClickHouse/issues/37286). [#51858](https://github.com/ClickHouse/ClickHouse/pull/51858) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for the `OPTIMIZE` is not allowed by default (it can be unlocked with the `allow_experimental_replacing_merge_with_cleanup` setting). [#58267](https://github.com/ClickHouse/ClickHouse/pull/58267) ([Alexander Tokmakov](https://github.com/tavplubix)). This fixes [#57930](https://github.com/ClickHouse/ClickHouse/issues/57930). This closes [#54988](https://github.com/ClickHouse/ClickHouse/issues/54988). This closes [#54570](https://github.com/ClickHouse/ClickHouse/issues/54570). This closes [#50346](https://github.com/ClickHouse/ClickHouse/issues/50346). This closes [#47579](https://github.com/ClickHouse/ClickHouse/issues/47579). The feature has to be removed because it is not good. We have to remove it as quickly as possible, because there is no other option. [#57932](https://github.com/ClickHouse/ClickHouse/pull/57932) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implement Refreshable Materialized Views, requested in [#33919](https://github.com/ClickHouse/ClickHouse/issues/57995). [#56946](https://github.com/ClickHouse/ClickHouse/pull/56946) ([Michael Kolupaev](https://github.com/al13n321), [Michael Guzov](https://github.com/koloshmet)). +* Introduce `PASTE JOIN`, which allows users to join tables without `ON` clause simply by row numbers. Example: `SELECT * FROM (SELECT number AS a FROM numbers(2)) AS t1 PASTE JOIN (SELECT number AS a FROM numbers(2) ORDER BY a DESC) AS t2`. [#57995](https://github.com/ClickHouse/ClickHouse/pull/57995) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* The `ORDER BY` clause now supports specifying `ALL`, meaning that ClickHouse sorts by all columns in the `SELECT` clause. Example: `SELECT col1, col2 FROM tab WHERE [...] ORDER BY ALL`. [#57875](https://github.com/ClickHouse/ClickHouse/pull/57875) ([zhongyuankai](https://github.com/zhongyuankai)). +* Added a new mutation command `ALTER TABLE APPLY DELETED MASK`, which allows to enforce applying of mask written by lightweight delete and to remove rows marked as deleted from disk. [#57433](https://github.com/ClickHouse/ClickHouse/pull/57433) ([Anton Popov](https://github.com/CurtizJ)). +* A handler `/binary` opens a visual viewer of symbols inside the ClickHouse binary. [#58211](https://github.com/ClickHouse/ClickHouse/pull/58211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added a new SQL function `sqid` to generate Sqids (https://sqids.org/), example: `SELECT sqid(125, 126)`. [#57512](https://github.com/ClickHouse/ClickHouse/pull/57512) ([Robert Schulze](https://github.com/rschu1ze)). +* Add a new function `seriesPeriodDetectFFT` to detect series period using FFT. [#57574](https://github.com/ClickHouse/ClickHouse/pull/57574) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Add an HTTP endpoint for checking if Keeper is ready to accept traffic. [#55876](https://github.com/ClickHouse/ClickHouse/pull/55876) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add 'union' mode for schema inference. In this mode the resulting table schema is the union of all files schemas (so schema is inferred from each file). The mode of schema inference is controlled by a setting `schema_inference_mode` with two possible values - `default` and `union`. Closes [#55428](https://github.com/ClickHouse/ClickHouse/issues/55428). [#55892](https://github.com/ClickHouse/ClickHouse/pull/55892) ([Kruglov Pavel](https://github.com/Avogar)). +* Add new setting `input_format_csv_try_infer_numbers_from_strings` that allows to infer numbers from strings in CSV format. Closes [#56455](https://github.com/ClickHouse/ClickHouse/issues/56455). [#56859](https://github.com/ClickHouse/ClickHouse/pull/56859) ([Kruglov Pavel](https://github.com/Avogar)). +* When the number of databases or tables exceeds a configurable threshold, show a warning to the user. [#57375](https://github.com/ClickHouse/ClickHouse/pull/57375) ([凌涛](https://github.com/lingtaolf)). +* Dictionary with `HASHED_ARRAY` (and `COMPLEX_KEY_HASHED_ARRAY`) layout supports `SHARDS` similarly to `HASHED`. [#57544](https://github.com/ClickHouse/ClickHouse/pull/57544) ([vdimir](https://github.com/vdimir)). +* Add asynchronous metrics for total primary key bytes and total allocated primary key bytes in memory. [#57551](https://github.com/ClickHouse/ClickHouse/pull/57551) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `SHA512_256` function. [#57645](https://github.com/ClickHouse/ClickHouse/pull/57645) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `FORMAT_BYTES` as an alias for `formatReadableSize`. [#57592](https://github.com/ClickHouse/ClickHouse/pull/57592) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow passing optional session token to the `s3` table function. [#57850](https://github.com/ClickHouse/ClickHouse/pull/57850) ([Shani Elharrar](https://github.com/shanielh)). +* Introduce a new setting `http_make_head_request`. If it is turned off, the URL table engine will not do a HEAD request to determine the file size. This is needed to support inefficient, misconfigured, or not capable HTTP servers. [#54602](https://github.com/ClickHouse/ClickHouse/pull/54602) ([Fionera](https://github.com/fionera)). +* It is now possible to refer to ALIAS column in index (non-primary-key) definitions (issue [#55650](https://github.com/ClickHouse/ClickHouse/issues/55650)). Example: `CREATE TABLE tab(col UInt32, col_alias ALIAS col + 1, INDEX idx (col_alias) TYPE minmax) ENGINE = MergeTree ORDER BY col;`. [#57546](https://github.com/ClickHouse/ClickHouse/pull/57546) ([Robert Schulze](https://github.com/rschu1ze)). +* Added a new setting `readonly` which can be used to specify an S3 disk is read only. It can be useful to create a table on a disk of `s3_plain` type, while having read only access to the underlying S3 bucket. [#57977](https://github.com/ClickHouse/ClickHouse/pull/57977) ([Pengyuan Bian](https://github.com/bianpengyuan)). +* The primary key analysis in MergeTree tables will now be applied to predicates that include the virtual column `_part_offset` (optionally with `_part`). This feature can serve as a special kind of a secondary index. [#58224](https://github.com/ClickHouse/ClickHouse/pull/58224) ([Amos Bird](https://github.com/amosbird)). + +#### Performance Improvement +* Extract non-intersecting parts ranges from MergeTree table during FINAL processing. That way we can avoid additional FINAL logic for this non-intersecting parts ranges. In case when amount of duplicate values with same primary key is low, performance will be almost the same as without FINAL. Improve reading performance for MergeTree FINAL when `do_not_merge_across_partitions_select_final` setting is set. [#58120](https://github.com/ClickHouse/ClickHouse/pull/58120) ([Maksim Kita](https://github.com/kitaisreal)). +* Made copy between s3 disks using a s3-server-side copy instead of copying through the buffer. Improves `BACKUP/RESTORE` operations and `clickhouse-disks copy` command. [#56744](https://github.com/ClickHouse/ClickHouse/pull/56744) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Hash JOIN respects setting `max_joined_block_size_rows` and do not produce large blocks for `ALL JOIN`. [#56996](https://github.com/ClickHouse/ClickHouse/pull/56996) ([vdimir](https://github.com/vdimir)). +* Release memory for aggregation earlier. This may avoid unnecessary external aggregation. [#57691](https://github.com/ClickHouse/ClickHouse/pull/57691) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve performance of string serialization. [#57717](https://github.com/ClickHouse/ClickHouse/pull/57717) ([Maksim Kita](https://github.com/kitaisreal)). +* Support trivial count optimization for `Merge`-engine tables. [#57867](https://github.com/ClickHouse/ClickHouse/pull/57867) ([skyoct](https://github.com/skyoct)). +* Optimized aggregation in some cases. [#57872](https://github.com/ClickHouse/ClickHouse/pull/57872) ([Anton Popov](https://github.com/CurtizJ)). +* The `hasAny` function can now take advantage of the full-text skipping indices. [#57878](https://github.com/ClickHouse/ClickHouse/pull/57878) ([Jpnock](https://github.com/Jpnock)). +* Function `if(cond, then, else)` (and its alias `cond ? then : else`) were optimized to use branch-free evaluation. [#57885](https://github.com/ClickHouse/ClickHouse/pull/57885) ([zhanglistar](https://github.com/zhanglistar)). +* MergeTree automatically derive `do_not_merge_across_partitions_select_final` setting if partition key expression contains only columns from primary key expression. [#58218](https://github.com/ClickHouse/ClickHouse/pull/58218) ([Maksim Kita](https://github.com/kitaisreal)). +* Speedup `MIN` and `MAX` for native types. [#58231](https://github.com/ClickHouse/ClickHouse/pull/58231) ([Raúl Marín](https://github.com/Algunenano)). +* Implement `SLRU` cache policy for filesystem cache. [#57076](https://github.com/ClickHouse/ClickHouse/pull/57076) ([Kseniia Sumarokova](https://github.com/kssenii)). +* The limit for the number of connections per endpoint for background fetches was raised from `15` to the value of `background_fetches_pool_size` setting. - MergeTree-level setting `replicated_max_parallel_fetches_for_host` became obsolete - MergeTree-level settings `replicated_fetches_http_connection_timeout`, `replicated_fetches_http_send_timeout` and `replicated_fetches_http_receive_timeout` are moved to the Server-level. - Setting `keep_alive_timeout` is added to the list of Server-level settings. [#57523](https://github.com/ClickHouse/ClickHouse/pull/57523) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Make querying `system.filesystem_cache` not memory intensive. [#57687](https://github.com/ClickHouse/ClickHouse/pull/57687) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Reduce memory usage on strings deserialization. [#57787](https://github.com/ClickHouse/ClickHouse/pull/57787) ([Maksim Kita](https://github.com/kitaisreal)). +* More efficient constructor for Enum - it makes sense when Enum has a boatload of values. [#57887](https://github.com/ClickHouse/ClickHouse/pull/57887) ([Duc Canh Le](https://github.com/canhld94)). +* An improvement for reading from the filesystem cache: always use `pread` method. [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)). +* Add optimization for AND notEquals chain in logical expression optimizer. This optimization is only available with the experimental Analyzer enabled. [#58214](https://github.com/ClickHouse/ClickHouse/pull/58214) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). + +#### Improvement +* Support for soft memory limit in Keeper. It will refuse requests if the memory usage is close to the maximum. [#57271](https://github.com/ClickHouse/ClickHouse/pull/57271) ([Han Fei](https://github.com/hanfei1991)). [#57699](https://github.com/ClickHouse/ClickHouse/pull/57699) ([Han Fei](https://github.com/hanfei1991)). +* Make inserts into distributed tables handle updated cluster configuration properly. When the list of cluster nodes is dynamically updated, the Directory Monitor of the distribution table will update it. [#42826](https://github.com/ClickHouse/ClickHouse/pull/42826) ([zhongyuankai](https://github.com/zhongyuankai)). +* Do not allow creating a replicated table with inconsistent merge parameters. [#56833](https://github.com/ClickHouse/ClickHouse/pull/56833) ([Duc Canh Le](https://github.com/canhld94)). +* Show uncompressed size in `system.tables`. [#56618](https://github.com/ClickHouse/ClickHouse/issues/56618). [#57186](https://github.com/ClickHouse/ClickHouse/pull/57186) ([Chen Lixiang](https://github.com/chenlx0)). +* Add `skip_unavailable_shards` as a setting for `Distributed` tables that is similar to the corresponding query-level setting. Closes [#43666](https://github.com/ClickHouse/ClickHouse/issues/43666). [#57218](https://github.com/ClickHouse/ClickHouse/pull/57218) ([Gagan Goel](https://github.com/tntnatbry)). +* The function `substring` (aliases: `substr`, `mid`) can now be used with `Enum` types. Previously, the first function argument had to be a value of type `String` or `FixedString`. This improves compatibility with 3rd party tools such as Tableau via MySQL interface. [#57277](https://github.com/ClickHouse/ClickHouse/pull/57277) ([Serge Klochkov](https://github.com/slvrtrn)). +* Function `format` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). This is important to calculate `SELECT format('The {0} to all questions is {1}', 'answer', 42)`. [#57549](https://github.com/ClickHouse/ClickHouse/pull/57549) ([Robert Schulze](https://github.com/rschu1ze)). +* Allows to use the `date_trunc` function with a case-insensitive first argument. Both cases are now supported: `SELECT date_trunc('day', now())` and `SELECT date_trunc('DAY', now())`. [#57624](https://github.com/ClickHouse/ClickHouse/pull/57624) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Better hints when a table doesn't exist. [#57342](https://github.com/ClickHouse/ClickHouse/pull/57342) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow to overwrite `max_partition_size_to_drop` and `max_table_size_to_drop` server settings in query time. [#57452](https://github.com/ClickHouse/ClickHouse/pull/57452) ([Jordi Villar](https://github.com/jrdi)). +* Slightly better inference of unnamed tupes in JSON formats. [#57751](https://github.com/ClickHouse/ClickHouse/pull/57751) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for read-only flag when connecting to Keeper (fixes [#53749](https://github.com/ClickHouse/ClickHouse/issues/53749)). [#57479](https://github.com/ClickHouse/ClickHouse/pull/57479) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Fix possible distributed sends stuck due to "No such file or directory" (during recovering a batch from disk). Fix possible issues with `error_count` from `system.distribution_queue` (in case of `distributed_directory_monitor_max_sleep_time_ms` >5min). Introduce profile event to track async INSERT failures - `DistributedAsyncInsertionFailures`. [#57480](https://github.com/ClickHouse/ClickHouse/pull/57480) ([Azat Khuzhin](https://github.com/azat)). +* Support PostgreSQL generated columns and default column values in `MaterializedPostgreSQL` (experimental feature). Closes [#40449](https://github.com/ClickHouse/ClickHouse/issues/40449). [#57568](https://github.com/ClickHouse/ClickHouse/pull/57568) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to apply some filesystem cache config settings changes without server restart. [#57578](https://github.com/ClickHouse/ClickHouse/pull/57578) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Properly handling PostgreSQL table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot](https://github.com/myrrc)). +* Expose the total number of errors occurred since last server restart as a `ClickHouseErrorMetric_ALL` metric. [#57627](https://github.com/ClickHouse/ClickHouse/pull/57627) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow nodes in the configuration file with `from_env`/`from_zk` reference and non empty element with replace=1. [#57628](https://github.com/ClickHouse/ClickHouse/pull/57628) ([Azat Khuzhin](https://github.com/azat)). +* A table function `fuzzJSON` which allows generating a lot of malformed JSON for fuzzing. [#57646](https://github.com/ClickHouse/ClickHouse/pull/57646) ([Julia Kartseva](https://github.com/jkartseva)). +* Allow IPv6 to UInt128 conversion and binary arithmetic. [#57707](https://github.com/ClickHouse/ClickHouse/pull/57707) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add a setting for `async inserts deduplication cache` - how long we wait for cache update. Deprecate setting `async_block_ids_cache_min_update_interval_ms`. Now cache is updated only in case of conflicts. [#57743](https://github.com/ClickHouse/ClickHouse/pull/57743) ([alesapin](https://github.com/alesapin)). +* `sleep()` function now can be cancelled with `KILL QUERY`. [#57746](https://github.com/ClickHouse/ClickHouse/pull/57746) ([Vitaly Baranov](https://github.com/vitlibar)). +* Forbid `CREATE TABLE ... AS SELECT` queries for `Replicated` table engines in the experimental `Replicated` database because they are not supported. Reference [#35408](https://github.com/ClickHouse/ClickHouse/issues/35408). [#57796](https://github.com/ClickHouse/ClickHouse/pull/57796) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix and improve transforming queries for external databases, to recursively obtain all compatible predicates. [#57888](https://github.com/ClickHouse/ClickHouse/pull/57888) ([flynn](https://github.com/ucasfl)). +* Support dynamic reloading of the filesystem cache size. Closes [#57866](https://github.com/ClickHouse/ClickHouse/issues/57866). [#57897](https://github.com/ClickHouse/ClickHouse/pull/57897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly support `system.stack_trace` for threads with blocked SIGRTMIN (these threads can exist in low-quality external libraries such as Apache rdkafka). [#57907](https://github.com/ClickHouse/ClickHouse/pull/57907) ([Azat Khuzhin](https://github.com/azat)). Aand also send signal to the threads only if it is not blocked to avoid waiting `storage_system_stack_trace_pipe_read_timeout_ms` when it does not make any sense. [#58136](https://github.com/ClickHouse/ClickHouse/pull/58136) ([Azat Khuzhin](https://github.com/azat)). +* Tolerate keeper failures in the quorum inserts' check. [#57986](https://github.com/ClickHouse/ClickHouse/pull/57986) ([Raúl Marín](https://github.com/Algunenano)). +* Add max/peak RSS (`MemoryResidentMax`) into system.asynchronous_metrics. [#58095](https://github.com/ClickHouse/ClickHouse/pull/58095) ([Azat Khuzhin](https://github.com/azat)). +* This PR allows users to use s3-style links (`https://` and `s3://`) without mentioning region if it's not default. Also find the correct region if the user mentioned the wrong one. [#58148](https://github.com/ClickHouse/ClickHouse/pull/58148) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* `clickhouse-format --obfuscate` will know about Settings, MergeTreeSettings, and time zones and keep their names unchanged. [#58179](https://github.com/ClickHouse/ClickHouse/pull/58179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added explicit `finalize()` function in `ZipArchiveWriter`. Simplify too complicated code in `ZipArchiveWriter`. This fixes [#58074](https://github.com/ClickHouse/ClickHouse/issues/58074). [#58202](https://github.com/ClickHouse/ClickHouse/pull/58202) ([Vitaly Baranov](https://github.com/vitlibar)). +* Make caches with the same path use the same cache objects. This behaviour existed before, but was broken in 23.4. If such caches with the same path have different set of cache settings, an exception will be thrown, that this is not allowed. [#58264](https://github.com/ClickHouse/ClickHouse/pull/58264) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Parallel replicas (experimental feature): friendly settings [#57542](https://github.com/ClickHouse/ClickHouse/pull/57542) ([Igor Nikonov](https://github.com/devcrafter)). +* Parallel replicas (experimental feature): announcement response handling improvement [#57749](https://github.com/ClickHouse/ClickHouse/pull/57749) ([Igor Nikonov](https://github.com/devcrafter)). +* Parallel replicas (experimental feature): give more respect to `min_number_of_marks` in `ParallelReplicasReadingCoordinator` [#57763](https://github.com/ClickHouse/ClickHouse/pull/57763) ([Nikita Taranov](https://github.com/nickitat)). +* Parallel replicas (experimental feature): disable parallel replicas with IN (subquery) [#58133](https://github.com/ClickHouse/ClickHouse/pull/58133) ([Igor Nikonov](https://github.com/devcrafter)). +* Parallel replicas (experimental feature): add profile event 'ParallelReplicasUsedCount' [#58173](https://github.com/ClickHouse/ClickHouse/pull/58173) ([Igor Nikonov](https://github.com/devcrafter)). +* Non POST requests such as HEAD will be readonly similar to GET. [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)). +* Add `bytes_uncompressed` column to `system.part_log` [#58167](https://github.com/ClickHouse/ClickHouse/pull/58167) ([Jordi Villar](https://github.com/jrdi)). +* Add base backup name to `system.backups` and `system.backup_log` tables [#58178](https://github.com/ClickHouse/ClickHouse/pull/58178) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Add support for specifying query parameters in the command line in clickhouse-local [#58210](https://github.com/ClickHouse/ClickHouse/pull/58210) ([Pradeep Chhetri](https://github.com/chhetripradeep)). + +#### Build/Testing/Packaging Improvement +* Randomize more settings [#39663](https://github.com/ClickHouse/ClickHouse/pull/39663) ([Anton Popov](https://github.com/CurtizJ)). +* Randomize disabled optimizations in CI [#57315](https://github.com/ClickHouse/ClickHouse/pull/57315) ([Raúl Marín](https://github.com/Algunenano)). +* Allow usage of Azure-related table engines/functions on macOS. [#51866](https://github.com/ClickHouse/ClickHouse/pull/51866) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse Fast Test now uses Musl instead of GLibc. [#57711](https://github.com/ClickHouse/ClickHouse/pull/57711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). The fully-static Musl build is available to download from the CI. +* Run ClickBench for every commit. This closes [#57708](https://github.com/ClickHouse/ClickHouse/issues/57708). [#57712](https://github.com/ClickHouse/ClickHouse/pull/57712) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove the usage of a harmful C/POSIX `select` function from external libraries. [#57467](https://github.com/ClickHouse/ClickHouse/pull/57467) ([Igor Nikonov](https://github.com/devcrafter)). +* Settings only available in ClickHouse Cloud will be also present in the open-source ClickHouse build for convenience. [#57638](https://github.com/ClickHouse/ClickHouse/pull/57638) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fixed a possibility of sorting order breakage in TTL GROUP BY [#49103](https://github.com/ClickHouse/ClickHouse/pull/49103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix: split `lttb` bucket strategy, first bucket and last bucket should only contain single point [#57003](https://github.com/ClickHouse/ClickHouse/pull/57003) ([FFish](https://github.com/wxybear)). +* Fix possible deadlock in the `Template` format during sync after error [#57004](https://github.com/ClickHouse/ClickHouse/pull/57004) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix early stop while parsing a file with skipping lots of errors [#57006](https://github.com/ClickHouse/ClickHouse/pull/57006) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent dictionary's ACL bypass via the `dictionary` table function [#57362](https://github.com/ClickHouse/ClickHouse/pull/57362) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix another case of a "non-ready set" error found by Fuzzer. [#57423](https://github.com/ClickHouse/ClickHouse/pull/57423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix several issues regarding PostgreSQL `array_ndims` usage. [#57436](https://github.com/ClickHouse/ClickHouse/pull/57436) ([Ryan Jacobs](https://github.com/ryanmjacobs)). +* Fix RWLock inconsistency after write lock timeout [#57454](https://github.com/ClickHouse/ClickHouse/pull/57454) ([Vitaly Baranov](https://github.com/vitlibar)). Fix RWLock inconsistency after write lock timeout (again) [#57733](https://github.com/ClickHouse/ClickHouse/pull/57733) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix: don't exclude ephemeral column when building pushing to view chain [#57461](https://github.com/ClickHouse/ClickHouse/pull/57461) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* MaterializedPostgreSQL (experimental issue): fix issue [#41922](https://github.com/ClickHouse/ClickHouse/issues/41922), add test for [#41923](https://github.com/ClickHouse/ClickHouse/issues/41923) [#57515](https://github.com/ClickHouse/ClickHouse/pull/57515) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix crash in clickhouse-local [#57553](https://github.com/ClickHouse/ClickHouse/pull/57553) ([Nikolay Degterinsky](https://github.com/evillique)). +* A fix for Hash JOIN. [#57564](https://github.com/ClickHouse/ClickHouse/pull/57564) ([vdimir](https://github.com/vdimir)). +* Fix possible error in PostgreSQL source [#57567](https://github.com/ClickHouse/ClickHouse/pull/57567) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix type correction in Hash JOIN for nested LowCardinality. [#57614](https://github.com/ClickHouse/ClickHouse/pull/57614) ([vdimir](https://github.com/vdimir)). +* Avoid hangs of `system.stack_trace` by correctly prohibiting parallel reading from it. [#57641](https://github.com/ClickHouse/ClickHouse/pull/57641) ([Azat Khuzhin](https://github.com/azat)). +* Fix an error for aggregation of sparse columns with `any(...) RESPECT NULL` [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)). +* Fix unary operators parsing [#57713](https://github.com/ClickHouse/ClickHouse/pull/57713) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix dependency loading for the experimental table engine `MaterializedPostgreSQL`. [#57754](https://github.com/ClickHouse/ClickHouse/pull/57754) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix retries for disconnected nodes for BACKUP/RESTORE ON CLUSTER [#57764](https://github.com/ClickHouse/ClickHouse/pull/57764) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix result of external aggregation in case of partially materialized projection [#57790](https://github.com/ClickHouse/ClickHouse/pull/57790) ([Anton Popov](https://github.com/CurtizJ)). +* Fix merge in aggregation functions with `*Map` combinator [#57795](https://github.com/ClickHouse/ClickHouse/pull/57795) ([Anton Popov](https://github.com/CurtizJ)). +* Disable `system.kafka_consumers` because it has a bug. [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)). +* Fix LowCardinality keys support in Merge JOIN. [#57827](https://github.com/ClickHouse/ClickHouse/pull/57827) ([vdimir](https://github.com/vdimir)). +* A fix for `InterpreterCreateQuery` related to the sample block. [#57855](https://github.com/ClickHouse/ClickHouse/pull/57855) ([Maksim Kita](https://github.com/kitaisreal)). +* `addresses_expr` were ignored for named collections from PostgreSQL. [#57874](https://github.com/ClickHouse/ClickHouse/pull/57874) ([joelynch](https://github.com/joelynch)). +* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). Then it was rewritten from Rust to C++ for better [memory-safety](https://www.memorysafety.org/). [#57994](https://github.com/ClickHouse/ClickHouse/pull/57994) ([Raúl Marín](https://github.com/Algunenano)). +* Normalize function names in `CREATE INDEX` [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)). +* Fix literal alias misclassification [#57988](https://github.com/ClickHouse/ClickHouse/pull/57988) ([Chen768959](https://github.com/Chen768959)). +* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix integer overflow in the `Poco` library, related to `UTF32Encoding` [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)). +* Fix parallel replicas (experimental feature) in presence of a scalar subquery with a big integer value [#58118](https://github.com/ClickHouse/ClickHouse/pull/58118) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `accurateCastOrNull` for out-of-range `DateTime` [#58139](https://github.com/ClickHouse/ClickHouse/pull/58139) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix possible `PARAMETER_OUT_OF_BOUND` error during subcolumns reading from a wide part in MergeTree [#58175](https://github.com/ClickHouse/ClickHouse/pull/58175) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a slow-down of CREATE VIEW with an enormous number of subqueries [#58220](https://github.com/ClickHouse/ClickHouse/pull/58220) ([Tao Wang](https://github.com/wangtZJU)). +* Fix parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)). + + +### ClickHouse release 23.11, 2023-12-06 + +#### Backward Incompatible Change +* The default ClickHouse server configuration file has enabled `access_management` (user manipulation by SQL queries) and `named_collection_control` (manipulation of named collection by SQL queries) for the `default` user by default. This closes [#56482](https://github.com/ClickHouse/ClickHouse/issues/56482). [#56619](https://github.com/ClickHouse/ClickHouse/pull/56619) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Multiple improvements for `RESPECT NULLS`/`IGNORE NULLS` for window functions. If you use them as aggregate functions and store the states of aggregate functions with these modifiers, they might become incompatible. [#57189](https://github.com/ClickHouse/ClickHouse/pull/57189) ([Raúl Marín](https://github.com/Algunenano)). +* Remove optimization `optimize_move_functions_out_of_any`. [#57190](https://github.com/ClickHouse/ClickHouse/pull/57190) ([Raúl Marín](https://github.com/Algunenano)). +* Formatters `%l`/`%k`/`%c` in function `parseDateTime` are now able to parse hours/months without leading zeros, e.g. `select parseDateTime('2023-11-26 8:14', '%F %k:%i')` now works. Set `parsedatetime_parse_without_leading_zeros = 0` to restore the previous behavior which required two digits. Function `formatDateTime` is now also able to print hours/months without leading zeros. This is controlled by setting `formatdatetime_format_without_leading_zeros` but off by default to not break existing use cases. [#55872](https://github.com/ClickHouse/ClickHouse/pull/55872) ([Azat Khuzhin](https://github.com/azat)). +* You can no longer use the aggregate function `avgWeighted` with arguments of type `Decimal`. Workaround: convert arguments to `Float64`. This closes [#43928](https://github.com/ClickHouse/ClickHouse/issues/43928). This closes [#31768](https://github.com/ClickHouse/ClickHouse/issues/31768). This closes [#56435](https://github.com/ClickHouse/ClickHouse/issues/56435). If you have used this function inside materialized views or projections with `Decimal` arguments, contact support@clickhouse.com. Fixed error in aggregate function `sumMap` and made it slower around 1.5..2 times. It does not matter because the function is garbage anyway. This closes [#54955](https://github.com/ClickHouse/ClickHouse/issues/54955). This closes [#53134](https://github.com/ClickHouse/ClickHouse/issues/53134). This closes [#55148](https://github.com/ClickHouse/ClickHouse/issues/55148). Fix a bug in function `groupArraySample` - it used the same random seed in case more than one aggregate state is generated in a query. [#56350](https://github.com/ClickHouse/ClickHouse/pull/56350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added server setting `async_load_databases` for asynchronous loading of databases and tables. Speeds up the server start time. Applies to databases with `Ordinary`, `Atomic` and `Replicated` engines. Their tables load metadata asynchronously. Query to a table increases the priority of the load job and waits for it to be done. Added a new table `system.asynchronous_loader` for introspection. [#49351](https://github.com/ClickHouse/ClickHouse/pull/49351) ([Sergei Trifonov](https://github.com/serxa)). +* Add system table `blob_storage_log`. It allows auditing all the data written to S3 and other object storages. [#52918](https://github.com/ClickHouse/ClickHouse/pull/52918) ([vdimir](https://github.com/vdimir)). +* Use statistics to order prewhere conditions better. [#53240](https://github.com/ClickHouse/ClickHouse/pull/53240) ([Han Fei](https://github.com/hanfei1991)). +* Added support for compression in the Keeper's protocol. It can be enabled on the ClickHouse side by using this flag `use_compression` inside `zookeeper` section. Keep in mind that only ClickHouse Keeper supports compression, while Apache ZooKeeper does not. Resolves [#49507](https://github.com/ClickHouse/ClickHouse/issues/49507). [#54957](https://github.com/ClickHouse/ClickHouse/pull/54957) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Introduce the feature `storage_metadata_write_full_object_key`. If it is set as `true` then metadata files are written with the new format. With that format ClickHouse stores full remote object key in the metadata file which allows better flexibility and optimization. [#55566](https://github.com/ClickHouse/ClickHouse/pull/55566) ([Sema Checherinda](https://github.com/CheSema)). +* Add new settings and syntax to protect named collections' fields from being overridden. This is meant to prevent a malicious user from obtaining unauthorized access to secrets. [#55782](https://github.com/ClickHouse/ClickHouse/pull/55782) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Add `hostname` column to all system log tables - it is useful if you make the system tables replicated, shared, or distributed. [#55894](https://github.com/ClickHouse/ClickHouse/pull/55894) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `CHECK ALL TABLES` query. [#56022](https://github.com/ClickHouse/ClickHouse/pull/56022) ([vdimir](https://github.com/vdimir)). +* Added function `fromDaysSinceYearZero` which is similar to MySQL's `FROM_DAYS`. E.g. `SELECT fromDaysSinceYearZero(739136)` returns `2023-09-08`. [#56088](https://github.com/ClickHouse/ClickHouse/pull/56088) ([Joanna Hulboj](https://github.com/jh0x)). +* Add an external Python tool to view backups and to extract information from them without using ClickHouse. [#56268](https://github.com/ClickHouse/ClickHouse/pull/56268) ([Vitaly Baranov](https://github.com/vitlibar)). +* Implement a new setting called `preferred_optimize_projection_name`. If it is set to a non-empty string, the specified projection would be used if possible instead of choosing from all the candidates. [#56309](https://github.com/ClickHouse/ClickHouse/pull/56309) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add 4-letter command for yielding/resigning leadership (https://github.com/ClickHouse/ClickHouse/issues/56352). [#56354](https://github.com/ClickHouse/ClickHouse/pull/56354) ([Pradeep Chhetri](https://github.com/chhetripradeep)). [#56620](https://github.com/ClickHouse/ClickHouse/pull/56620) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Added a new SQL function, `arrayRandomSample(arr, k)` which returns a sample of k elements from the input array. Similar functionality could previously be achieved only with less convenient syntax, e.g. `SELECT arrayReduce('groupArraySample(3)', range(10))`. [#56416](https://github.com/ClickHouse/ClickHouse/pull/56416) ([Robert Schulze](https://github.com/rschu1ze)). +* Added support for `Float16` type data to use in `.npy` files. Closes [#56344](https://github.com/ClickHouse/ClickHouse/issues/56344). [#56424](https://github.com/ClickHouse/ClickHouse/pull/56424) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Added a system view `information_schema.statistics` for better compatibility with Tableau Online. [#56425](https://github.com/ClickHouse/ClickHouse/pull/56425) ([Serge Klochkov](https://github.com/slvrtrn)). +* Add `system.symbols` table useful for introspection of the binary. [#56548](https://github.com/ClickHouse/ClickHouse/pull/56548) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Configurable dashboards. Queries for charts are now loaded using a query, which by default uses a new `system.dashboards` table. [#56771](https://github.com/ClickHouse/ClickHouse/pull/56771) ([Sergei Trifonov](https://github.com/serxa)). +* Introduce `fileCluster` table function - it is useful if you mount a shared filesystem (NFS and similar) into the `user_files` directory. [#56868](https://github.com/ClickHouse/ClickHouse/pull/56868) ([Andrey Zvonov](https://github.com/zvonand)). +* Add `_size` virtual column with file size in bytes to `s3/file/hdfs/url/azureBlobStorage` engines. [#57126](https://github.com/ClickHouse/ClickHouse/pull/57126) ([Kruglov Pavel](https://github.com/Avogar)). +* Expose the number of errors for each error code occurred on a server since last restart from the Prometheus endpoint. [#57209](https://github.com/ClickHouse/ClickHouse/pull/57209) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* ClickHouse keeper reports its running availability zone at `/keeper/availability-zone` path. This can be configured via `us-west-1a`. [#56715](https://github.com/ClickHouse/ClickHouse/pull/56715) ([Jianfei Hu](https://github.com/incfly)). +* Make ALTER materialized_view MODIFY QUERY non experimental and deprecate `allow_experimental_alter_materialized_view_structure` setting. Fixes [#15206](https://github.com/ClickHouse/ClickHouse/issues/15206). [#57311](https://github.com/ClickHouse/ClickHouse/pull/57311) ([alesapin](https://github.com/alesapin)). +* Setting `join_algorithm` respects specified order [#51745](https://github.com/ClickHouse/ClickHouse/pull/51745) ([vdimir](https://github.com/vdimir)). +* Add support for the [well-known Protobuf types](https://protobuf.dev/reference/protobuf/google.protobuf/) in the Protobuf format. [#56741](https://github.com/ClickHouse/ClickHouse/pull/56741) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### Performance Improvement +* Adaptive timeouts for interacting with S3. The first attempt is made with low send and receive timeouts. [#56314](https://github.com/ClickHouse/ClickHouse/pull/56314) ([Sema Checherinda](https://github.com/CheSema)). +* Increase the default value of `max_concurrent_queries` from 100 to 1000. This makes sense when there is a large number of connecting clients, which are slowly sending or receiving data, so the server is not limited by CPU, or when the number of CPU cores is larger than 100. Also, enable the concurrency control by default, and set the desired number of query processing threads in total as twice the number of CPU cores. It improves performance in scenarios with a very large number of concurrent queries. [#46927](https://github.com/ClickHouse/ClickHouse/pull/46927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support parallel evaluation of window functions. Fixes [#34688](https://github.com/ClickHouse/ClickHouse/issues/34688). [#39631](https://github.com/ClickHouse/ClickHouse/pull/39631) ([Dmitry Novik](https://github.com/novikd)). +* `Numbers` table engine (of the `system.numbers` table) now analyzes the condition to generate the needed subset of data, like table's index. [#50909](https://github.com/ClickHouse/ClickHouse/pull/50909) ([JackyWoo](https://github.com/JackyWoo)). +* Improved the performance of filtering by `IN (...)` condition for `Merge` table engine. [#54905](https://github.com/ClickHouse/ClickHouse/pull/54905) ([Nikita Taranov](https://github.com/nickitat)). +* An improvement which takes place when the filesystem cache is full and there are big reads. [#55158](https://github.com/ClickHouse/ClickHouse/pull/55158) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add ability to disable checksums for S3 to avoid excessive pass over the file (this is controlled by the setting `s3_disable_checksum`). [#55559](https://github.com/ClickHouse/ClickHouse/pull/55559) ([Azat Khuzhin](https://github.com/azat)). +* Now we read synchronously from remote tables when data is in page cache (like we do for local tables). It is faster, it doesn't require synchronisation inside the thread pool, and doesn't hesitate to do `seek`-s on local FS, and reduces CPU wait. [#55841](https://github.com/ClickHouse/ClickHouse/pull/55841) ([Nikita Taranov](https://github.com/nickitat)). +* Optimization for getting value from `map`, `arrayElement`. It will bring about 30% speedup. - reduce the reserved memory - reduce the `resize` call. [#55957](https://github.com/ClickHouse/ClickHouse/pull/55957) ([lgbo](https://github.com/lgbo-ustc)). +* Optimization of multi-stage filtering with AVX-512. The performance experiments of the OnTime dataset on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring the improvements of 7.4%, 5.9%, 4.7%, 3.0%, and 4.6% to the QPS of the query Q2, Q3, Q4, Q5 and Q6 respectively while having no impact on others. [#56079](https://github.com/ClickHouse/ClickHouse/pull/56079) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Limit the number of threads busy inside the query profiler. If there are more - they will skip profiling. [#56105](https://github.com/ClickHouse/ClickHouse/pull/56105) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Decrease the amount of virtual function calls in window functions. [#56120](https://github.com/ClickHouse/ClickHouse/pull/56120) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow recursive Tuple field pruning in ORC data format to speed up scaning. [#56122](https://github.com/ClickHouse/ClickHouse/pull/56122) ([李扬](https://github.com/taiyang-li)). +* Trivial count optimization for `Npy` data format: queries like `select count() from 'data.npy'` will work much more fast because of caching the results. [#56304](https://github.com/ClickHouse/ClickHouse/pull/56304) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Queries with aggregation and a large number of streams will use less amount of memory during the plan's construction. [#57074](https://github.com/ClickHouse/ClickHouse/pull/57074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of executing queries for use cases with many users and highly concurrent queries (>2000 QPS) by optimizing the access to ProcessList. [#57106](https://github.com/ClickHouse/ClickHouse/pull/57106) ([Andrej Hoos](https://github.com/adikus)). +* Trivial improvement on array join, reuse some intermediate results. [#57183](https://github.com/ClickHouse/ClickHouse/pull/57183) ([李扬](https://github.com/taiyang-li)). +* There are cases when stack unwinding was slow. Not anymore. [#57221](https://github.com/ClickHouse/ClickHouse/pull/57221) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use default read pool for reading from external storage when `max_streams = 1`. It is beneficial when read prefetches are enabled. [#57334](https://github.com/ClickHouse/ClickHouse/pull/57334) ([Nikita Taranov](https://github.com/nickitat)). +* Keeper improvement: improve memory-usage during startup by delaying log preprocessing. [#55660](https://github.com/ClickHouse/ClickHouse/pull/55660) ([Antonio Andelic](https://github.com/antonio2368)). +* Improved performance of glob matching for `File` and `HDFS` storages. [#56141](https://github.com/ClickHouse/ClickHouse/pull/56141) ([Andrey Zvonov](https://github.com/zvonand)). +* Posting lists in experimental full text indexes are now compressed which reduces their size by 10-30%. [#56226](https://github.com/ClickHouse/ClickHouse/pull/56226) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Parallelise `BackupEntriesCollector` in backups. [#56312](https://github.com/ClickHouse/ClickHouse/pull/56312) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement +* Add a new `MergeTree` setting `add_implicit_sign_column_constraint_for_collapsing_engine` (disabled by default). When enabled, it adds an implicit CHECK constraint for `CollapsingMergeTree` tables that restricts the value of the `Sign` column to be only -1 or 1. [#56701](https://github.com/ClickHouse/ClickHouse/issues/56701). [#56986](https://github.com/ClickHouse/ClickHouse/pull/56986) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). +* Enable adding new disk to storage configuration without restart. [#56367](https://github.com/ClickHouse/ClickHouse/pull/56367) ([Duc Canh Le](https://github.com/canhld94)). +* Support creating and materializing index in the same alter query, also support "modify TTL" and "materialize TTL" in the same query. Closes [#55651](https://github.com/ClickHouse/ClickHouse/issues/55651). [#56331](https://github.com/ClickHouse/ClickHouse/pull/56331) ([flynn](https://github.com/ucasfl)). +* Add a new table function named `fuzzJSON` with rows containing perturbed versions of the source JSON string with random variations. [#56490](https://github.com/ClickHouse/ClickHouse/pull/56490) ([Julia Kartseva](https://github.com/jkartseva)). +* Engine `Merge` filters the records according to the row policies of the underlying tables, so you don't have to create another row policy on a `Merge` table. [#50209](https://github.com/ClickHouse/ClickHouse/pull/50209) ([Ilya Golshtein](https://github.com/ilejn)). +* Add a setting `max_execution_time_leaf` to limit the execution time on shard for distributed query, and `timeout_overflow_mode_leaf` to control the behaviour if timeout happens. [#51823](https://github.com/ClickHouse/ClickHouse/pull/51823) ([Duc Canh Le](https://github.com/canhld94)). +* Add ClickHouse setting to disable tunneling for HTTPS requests over HTTP proxy. [#55033](https://github.com/ClickHouse/ClickHouse/pull/55033) ([Arthur Passos](https://github.com/arthurpassos)). +* Set `background_fetches_pool_size` to 16, background_schedule_pool_size to 512 that is better for production usage with frequent small insertions. [#54327](https://github.com/ClickHouse/ClickHouse/pull/54327) ([Denny Crane](https://github.com/den-crane)). +* While read data from a csv format file, and at end of line is `\r` , which not followed by `\n`, then we will enconter the exception as follows `Cannot parse CSV format: found \r (CR) not followed by \n (LF). Line must end by \n (LF) or \r\n (CR LF) or \n\r.` In clickhouse, the csv end of line must be `\n` or `\r\n` or `\n\r`, so the `\r` must be followed by `\n`, but in some suitation, the csv input data is abnormal, like above, `\r` is at end of line. [#54340](https://github.com/ClickHouse/ClickHouse/pull/54340) ([KevinyhZou](https://github.com/KevinyhZou)). +* Update Arrow library to release-13.0.0 that supports new encodings. Closes [#44505](https://github.com/ClickHouse/ClickHouse/issues/44505). [#54800](https://github.com/ClickHouse/ClickHouse/pull/54800) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve performance of ON CLUSTER queries by removing heavy system calls to get all network interfaces when looking for local ip address in the DDL entry hosts list. [#54909](https://github.com/ClickHouse/ClickHouse/pull/54909) ([Duc Canh Le](https://github.com/canhld94)). +* Fixed accounting of memory allocated before attaching a thread to a query or a user. [#56089](https://github.com/ClickHouse/ClickHouse/pull/56089) ([Nikita Taranov](https://github.com/nickitat)). +* Add support for `LARGE_LIST` in Apache Arrow formats. [#56118](https://github.com/ClickHouse/ClickHouse/pull/56118) ([edef](https://github.com/edef1c)). +* Allow manual compaction of `EmbeddedRocksDB` via `OPTIMIZE` query. [#56225](https://github.com/ClickHouse/ClickHouse/pull/56225) ([Azat Khuzhin](https://github.com/azat)). +* Add ability to specify BlockBasedTableOptions for `EmbeddedRocksDB` tables. [#56264](https://github.com/ClickHouse/ClickHouse/pull/56264) ([Azat Khuzhin](https://github.com/azat)). +* `SHOW COLUMNS` now displays MySQL's equivalent data type name when the connection was made through the MySQL protocol. Previously, this was the case when setting `use_mysql_types_in_show_columns = 1`. The setting is retained but made obsolete. [#56277](https://github.com/ClickHouse/ClickHouse/pull/56277) ([Robert Schulze](https://github.com/rschu1ze)). +* Fixed possible `The local set of parts of table doesn't look like the set of parts in ZooKeeper` error if server was restarted just after `TRUNCATE` or `DROP PARTITION`. [#56282](https://github.com/ClickHouse/ClickHouse/pull/56282) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed handling of non-const query strings in functions `formatQuery`/ `formatQuerySingleLine`. Also added `OrNull` variants of both functions that return a NULL when a query cannot be parsed instead of throwing an exception. [#56327](https://github.com/ClickHouse/ClickHouse/pull/56327) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Queries to `system.replicas` initiate requests to ZooKeeper when certain columns are queried. When there are thousands of tables these requests might produce a considerable load on ZooKeeper. If there are multiple simultaneous queries to `system.replicas` they do same requests multiple times. The change is to "deduplicate" requests from concurrent queries. [#56420](https://github.com/ClickHouse/ClickHouse/pull/56420) ([Alexander Gololobov](https://github.com/davenger)). +* Fix translation to MySQL compatible query for querying external databases. [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)). +* Add support for backing up and restoring tables using `KeeperMap` engine. [#56460](https://github.com/ClickHouse/ClickHouse/pull/56460) ([Antonio Andelic](https://github.com/antonio2368)). +* 404 response for CompleteMultipartUpload has to be rechecked. Operation could be done on server even if client got timeout or other network errors. The next retry of CompleteMultipartUpload receives 404 response. If the object key exists that operation is considered as successful. [#56475](https://github.com/ClickHouse/ClickHouse/pull/56475) ([Sema Checherinda](https://github.com/CheSema)). +* Enable the HTTP OPTIONS method by default - it simplifies requesting ClickHouse from a web browser. [#56483](https://github.com/ClickHouse/ClickHouse/pull/56483) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The value for `dns_max_consecutive_failures` was changed by mistake in [#46550](https://github.com/ClickHouse/ClickHouse/issues/46550) - this is reverted and adjusted to a better value. Also, increased the HTTP keep-alive timeout to a reasonable value from production. [#56485](https://github.com/ClickHouse/ClickHouse/pull/56485) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Load base backups lazily (a base backup won't be loaded until it's needed). Also add some log message and profile events for backups. [#56516](https://github.com/ClickHouse/ClickHouse/pull/56516) ([Vitaly Baranov](https://github.com/vitlibar)). +* Setting `query_cache_store_results_of_queries_with_nondeterministic_functions` (with values `false` or `true`) was marked obsolete. It was replaced by setting `query_cache_nondeterministic_function_handling`, a three-valued enum that controls how the query cache handles queries with non-deterministic functions: a) throw an exception (default behavior), b) save the non-deterministic query result regardless, or c) ignore, i.e. don't throw an exception and don't cache the result. [#56519](https://github.com/ClickHouse/ClickHouse/pull/56519) ([Robert Schulze](https://github.com/rschu1ze)). +* Rewrite equality with `is null` check in JOIN ON section. Experimental *Analyzer only*. [#56538](https://github.com/ClickHouse/ClickHouse/pull/56538) ([vdimir](https://github.com/vdimir)). +* Function`concat` now supports arbitrary argument types (instead of only String and FixedString arguments). This makes it behave more similar to MySQL `concat` implementation. For example, `SELECT concat('ab', 42)` now returns `ab42`. [#56540](https://github.com/ClickHouse/ClickHouse/pull/56540) ([Serge Klochkov](https://github.com/slvrtrn)). +* Allow getting cache configuration from 'named_collection' section in config or from SQL created named collections. [#56541](https://github.com/ClickHouse/ClickHouse/pull/56541) ([Kseniia Sumarokova](https://github.com/kssenii)). +* PostgreSQL database engine: Make the removal of outdated tables less aggressive with unsuccessful postgres connection. [#56609](https://github.com/ClickHouse/ClickHouse/pull/56609) ([jsc0218](https://github.com/jsc0218)). +* It took too much time to connnect to PG when URL is not right, so the relevant query stucks there and get cancelled. [#56648](https://github.com/ClickHouse/ClickHouse/pull/56648) ([jsc0218](https://github.com/jsc0218)). +* Keeper improvement: disable compressed logs by default in Keeper. [#56763](https://github.com/ClickHouse/ClickHouse/pull/56763) ([Antonio Andelic](https://github.com/antonio2368)). +* Add config setting `wait_dictionaries_load_at_startup`. [#56782](https://github.com/ClickHouse/ClickHouse/pull/56782) ([Vitaly Baranov](https://github.com/vitlibar)). +* There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)). +* Fix possible postgresql logical replication conversion error when using experimental `MaterializedPostgreSQL`. [#53721](https://github.com/ClickHouse/ClickHouse/pull/53721) ([takakawa](https://github.com/takakawa)). +* Implement user-level setting `alter_move_to_space_execute_async` which allow to execute queries `ALTER TABLE ... MOVE PARTITION|PART TO DISK|VOLUME` asynchronously. The size of pool for background executions is controlled by `background_move_pool_size`. Default behavior is synchronous execution. Fixes [#47643](https://github.com/ClickHouse/ClickHouse/issues/47643). [#56809](https://github.com/ClickHouse/ClickHouse/pull/56809) ([alesapin](https://github.com/alesapin)). +* Able to filter by engine when scanning system.tables, avoid unnecessary (potentially time-consuming) connection. [#56813](https://github.com/ClickHouse/ClickHouse/pull/56813) ([jsc0218](https://github.com/jsc0218)). +* Show `total_bytes` and `total_rows` in system tables for RocksDB storage. [#56816](https://github.com/ClickHouse/ClickHouse/pull/56816) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow basic commands in ALTER for TEMPORARY tables. [#56892](https://github.com/ClickHouse/ClickHouse/pull/56892) ([Sergey](https://github.com/icuken)). +* LZ4 compression. Buffer compressed block in a rare case when out buffer capacity is not enough for writing compressed block directly to out's buffer. [#56938](https://github.com/ClickHouse/ClickHouse/pull/56938) ([Sema Checherinda](https://github.com/CheSema)). +* Add metrics for the number of queued jobs, which is useful for the IO thread pool. [#56958](https://github.com/ClickHouse/ClickHouse/pull/56958) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a setting for PostgreSQL table engine setting in the config file. Added a check for the setting Added documentation around the additional setting. [#56959](https://github.com/ClickHouse/ClickHouse/pull/56959) ([Peignon Melvyn](https://github.com/melvynator)). +* Function `concat` can now be called with a single argument, e.g., `SELECT concat('abc')`. This makes its behavior more consistent with MySQL's concat implementation. [#57000](https://github.com/ClickHouse/ClickHouse/pull/57000) ([Serge Klochkov](https://github.com/slvrtrn)). +* Signs all `x-amz-*` headers as required by AWS S3 docs. [#57001](https://github.com/ClickHouse/ClickHouse/pull/57001) ([Arthur Passos](https://github.com/arthurpassos)). +* Function `fromDaysSinceYearZero` (alias: `FROM_DAYS`) can now be used with unsigned and signed integer types (previously, it had to be an unsigned integer). This improve compatibility with 3rd party tools such as Tableau Online. [#57002](https://github.com/ClickHouse/ClickHouse/pull/57002) ([Serge Klochkov](https://github.com/slvrtrn)). +* Add `system.s3queue_log` to default config. [#57036](https://github.com/ClickHouse/ClickHouse/pull/57036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change the default for `wait_dictionaries_load_at_startup` to true, and use this setting only if `dictionaries_lazy_load` is false. [#57133](https://github.com/ClickHouse/ClickHouse/pull/57133) ([Vitaly Baranov](https://github.com/vitlibar)). +* Check dictionary source type on creation even if `dictionaries_lazy_load` is enabled. [#57134](https://github.com/ClickHouse/ClickHouse/pull/57134) ([Vitaly Baranov](https://github.com/vitlibar)). +* Plan-level optimizations can now be enabled/disabled individually. Previously, it was only possible to disable them all. The setting which previously did that (`query_plan_enable_optimizations`) is retained and can still be used to disable all optimizations. [#57152](https://github.com/ClickHouse/ClickHouse/pull/57152) ([Robert Schulze](https://github.com/rschu1ze)). +* The server's exit code will correspond to the exception code. For example, if the server cannot start due to memory limit, it will exit with the code 241 = MEMORY_LIMIT_EXCEEDED. In previous versions, the exit code for exceptions was always 70 = Poco::Util::ExitCode::EXIT_SOFTWARE. [#57153](https://github.com/ClickHouse/ClickHouse/pull/57153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not demangle and symbolize stack frames from `functional` C++ header. [#57201](https://github.com/ClickHouse/ClickHouse/pull/57201) ([Mike Kot](https://github.com/myrrc)). +* HTTP server page `/dashboard` now supports charts with multiple lines. [#57236](https://github.com/ClickHouse/ClickHouse/pull/57236) ([Sergei Trifonov](https://github.com/serxa)). +* The `max_memory_usage_in_client` command line option supports a string value with a suffix (K, M, G, etc). Closes [#56879](https://github.com/ClickHouse/ClickHouse/issues/56879). [#57273](https://github.com/ClickHouse/ClickHouse/pull/57273) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.2.0 to v1.3.1 . Also fixed a bug in case of BOF (Block On Fault) = 0, changed to handle page faults by falling back to SW path. [#57291](https://github.com/ClickHouse/ClickHouse/pull/57291) ([jasperzhu](https://github.com/jinjunzh)). +* Increase default `replicated_deduplication_window` of MergeTree settings from 100 to 1k. [#57335](https://github.com/ClickHouse/ClickHouse/pull/57335) ([sichenzhao](https://github.com/sichenzhao)). +* Stop using `INCONSISTENT_METADATA_FOR_BACKUP` that much. If possible prefer to continue scanning instead of stopping and starting the scanning for backup from the beginning. [#57385](https://github.com/ClickHouse/ClickHouse/pull/57385) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* Add SQLLogic test. [#56078](https://github.com/ClickHouse/ClickHouse/pull/56078) ([Han Fei](https://github.com/hanfei1991)). +* Make `clickhouse-local` and `clickhouse-client` available under short names (`ch`, `chl`, `chc`) for usability. [#56634](https://github.com/ClickHouse/ClickHouse/pull/56634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Optimized build size further by removing unused code from external libraries. [#56786](https://github.com/ClickHouse/ClickHouse/pull/56786) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add automatic check that there are no large translation units. [#56559](https://github.com/ClickHouse/ClickHouse/pull/56559) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lower the size of the single-binary distribution. This closes [#55181](https://github.com/ClickHouse/ClickHouse/issues/55181). [#56617](https://github.com/ClickHouse/ClickHouse/pull/56617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Information about the sizes of every translation unit and binary file after each build will be sent to the CI database in ClickHouse Cloud. This closes [#56107](https://github.com/ClickHouse/ClickHouse/issues/56107). [#56636](https://github.com/ClickHouse/ClickHouse/pull/56636) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Certain files of "Apache Arrow" library (which we use only for non-essential things like parsing the arrow format) were rebuilt all the time regardless of the build cache. This is fixed. [#56657](https://github.com/ClickHouse/ClickHouse/pull/56657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid recompiling translation units depending on the autogenerated source file about version. [#56660](https://github.com/ClickHouse/ClickHouse/pull/56660) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Tracing data of the linker invocations will be sent to the CI database in ClickHouse Cloud. [#56725](https://github.com/ClickHouse/ClickHouse/pull/56725) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use DWARF 5 debug symbols for the clickhouse binary (was DWARF 4 previously). [#56770](https://github.com/ClickHouse/ClickHouse/pull/56770) ([Michael Kolupaev](https://github.com/al13n321)). +* Add a new build option `SANITIZE_COVERAGE`. If it is enabled, the code is instrumented to track the coverage. The collected information is available inside ClickHouse with: (1) a new function `coverage` that returns an array of unique addresses in the code found after the previous coverage reset; (2) `SYSTEM RESET COVERAGE` query that resets the accumulated data. This allows us to compare the coverage of different tests, including differential code coverage. Continuation of [#20539](https://github.com/ClickHouse/ClickHouse/issues/20539). [#56102](https://github.com/ClickHouse/ClickHouse/pull/56102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some of the stack frames might not be resolved when collecting stacks. In such cases the raw address might be helpful. [#56267](https://github.com/ClickHouse/ClickHouse/pull/56267) ([Alexander Gololobov](https://github.com/davenger)). +* Add an option to disable `libssh`. [#56333](https://github.com/ClickHouse/ClickHouse/pull/56333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable temporary_data_in_cache in S3 tests in CI. [#48425](https://github.com/ClickHouse/ClickHouse/pull/48425) ([vdimir](https://github.com/vdimir)). +* Set the max memory usage for clickhouse-client (`1G`) in the CI. [#56873](https://github.com/ClickHouse/ClickHouse/pull/56873) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix exerimental Analyzer - insertion from select with subquery referencing insertion table should process only insertion block. [#50857](https://github.com/ClickHouse/ClickHouse/pull/50857) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix a bug in `str_to_map` function. [#56423](https://github.com/ClickHouse/ClickHouse/pull/56423) ([Arthur Passos](https://github.com/arthurpassos)). +* Keeper `reconfig`: add timeout before yielding/taking leadership [#53481](https://github.com/ClickHouse/ClickHouse/pull/53481) ([Mike Kot](https://github.com/myrrc)). +* Fix incorrect header in grace hash join and filter pushdown [#53922](https://github.com/ClickHouse/ClickHouse/pull/53922) ([vdimir](https://github.com/vdimir)). +* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* RFC: Fix "Cannot find column X in source stream" for Distributed queries with LIMIT BY [#55836](https://github.com/ClickHouse/ClickHouse/pull/55836) ([Azat Khuzhin](https://github.com/azat)). +* Fix 'Cannot read from file:' while running client in a background [#55976](https://github.com/ClickHouse/ClickHouse/pull/55976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix clickhouse-local exit on bad send_logs_level setting [#55994](https://github.com/ClickHouse/ClickHouse/pull/55994) ([Kruglov Pavel](https://github.com/Avogar)). +* Bug fix explain ast with parameterized view [#56004](https://github.com/ClickHouse/ClickHouse/pull/56004) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ClickHouse-sourced dictionaries with an explicit query [#56236](https://github.com/ClickHouse/ClickHouse/pull/56236) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inconsistency of "cast('0' as DateTime64(3))" and "cast('0' as Nullable(DateTime64(3)))" [#56286](https://github.com/ClickHouse/ClickHouse/pull/56286) ([李扬](https://github.com/taiyang-li)). +* Fix rare race condition related to Memory allocation failure [#56303](https://github.com/ClickHouse/ClickHouse/pull/56303) ([alesapin](https://github.com/alesapin)). +* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix crash in filterPushDown [#56380](https://github.com/ClickHouse/ClickHouse/pull/56380) ([vdimir](https://github.com/vdimir)). +* Fix restore from backup with mat view and dropped source table [#56383](https://github.com/ClickHouse/ClickHouse/pull/56383) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix nullable primary key in final (2) [#56452](https://github.com/ClickHouse/ClickHouse/pull/56452) ([Amos Bird](https://github.com/amosbird)). +* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix startup failure due to TTL dependency [#56489](https://github.com/ClickHouse/ClickHouse/pull/56489) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ALTER COMMENT queries ON CLUSTER [#56491](https://github.com/ClickHouse/ClickHouse/pull/56491) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix empty NAMED COLLECTIONs [#56494](https://github.com/ClickHouse/ClickHouse/pull/56494) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix two cases of projection analysis. [#56502](https://github.com/ClickHouse/ClickHouse/pull/56502) ([Amos Bird](https://github.com/amosbird)). +* Fix handling of aliases in query cache [#56545](https://github.com/ClickHouse/ClickHouse/pull/56545) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix conversion from `Nullable(Enum)` to `Nullable(String)` [#56644](https://github.com/ClickHouse/ClickHouse/pull/56644) ([Nikolay Degterinsky](https://github.com/evillique)). +* More reliable log handling in Keeper [#56670](https://github.com/ClickHouse/ClickHouse/pull/56670) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix configuration merge for nodes with substitution attributes [#56694](https://github.com/ClickHouse/ClickHouse/pull/56694) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix duplicate usage of table function input(). [#56695](https://github.com/ClickHouse/ClickHouse/pull/56695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix 'mutex lock failed: Invalid argument' in clickhouse-local during insert into function [#56710](https://github.com/ClickHouse/ClickHouse/pull/56710) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix Date text parsing in optimistic path [#56765](https://github.com/ClickHouse/ClickHouse/pull/56765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* DatabaseReplicated: fix DDL query timeout after recovering a replica [#56796](https://github.com/ClickHouse/ClickHouse/pull/56796) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect nullable columns reporting in MySQL binary protocol [#56799](https://github.com/ClickHouse/ClickHouse/pull/56799) ([Serge Klochkov](https://github.com/slvrtrn)). +* Support Iceberg metadata files for metastore tables [#56810](https://github.com/ClickHouse/ClickHouse/pull/56810) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix TSAN report under transform [#56817](https://github.com/ClickHouse/ClickHouse/pull/56817) ([Raúl Marín](https://github.com/Algunenano)). +* Fix SET query and SETTINGS formatting [#56825](https://github.com/ClickHouse/ClickHouse/pull/56825) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix failure to start due to table dependency in joinGet [#56828](https://github.com/ClickHouse/ClickHouse/pull/56828) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix flattening existing Nested columns during ADD COLUMN [#56830](https://github.com/ClickHouse/ClickHouse/pull/56830) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix allow cr end of line for csv [#56901](https://github.com/ClickHouse/ClickHouse/pull/56901) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix `tryBase64Decode` with invalid input [#56913](https://github.com/ClickHouse/ClickHouse/pull/56913) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix generating deep nested columns in CapnProto/Protobuf schemas [#56941](https://github.com/ClickHouse/ClickHouse/pull/56941) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)). +* Fix sqlite file path validation [#56984](https://github.com/ClickHouse/ClickHouse/pull/56984) ([San](https://github.com/santrancisco)). +* S3Queue: fix metadata reference increment [#56990](https://github.com/ClickHouse/ClickHouse/pull/56990) ([Kseniia Sumarokova](https://github.com/kssenii)). +* S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). +* Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). +* Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). +* Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix JSON_QUERY function with multiple numeric paths [#57096](https://github.com/ClickHouse/ClickHouse/pull/57096) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)). +* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)). +* Ignore comments when comparing column descriptions [#57259](https://github.com/ClickHouse/ClickHouse/pull/57259) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)). +* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). +* Keeper fix for changelog and snapshots [#57299](https://github.com/ClickHouse/ClickHouse/pull/57299) ([Antonio Andelic](https://github.com/antonio2368)). +* Ignore finished ON CLUSTER tasks if hostname changed [#57339](https://github.com/ClickHouse/ClickHouse/pull/57339) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). +* FS cache: add a limit for background download [#57424](https://github.com/ClickHouse/ClickHouse/pull/57424) ([Kseniia Sumarokova](https://github.com/kssenii)). + + +### ClickHouse release 23.10, 2023-11-02 #### Backward Incompatible Change * There is no longer an option to automatically remove broken data parts. This closes [#55174](https://github.com/ClickHouse/ClickHouse/issues/55174). [#55184](https://github.com/ClickHouse/ClickHouse/pull/55184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#55557](https://github.com/ClickHouse/ClickHouse/pull/55557) ([Jihyuk Bok](https://github.com/tomahawk28)). @@ -39,7 +391,7 @@ * Allow to drop cache for Protobuf format with `SYSTEM DROP SCHEMA FORMAT CACHE [FOR Protobuf]`. [#55064](https://github.com/ClickHouse/ClickHouse/pull/55064) ([Aleksandr Musorin](https://github.com/AVMusorin)). * Add external HTTP Basic authenticator. [#55199](https://github.com/ClickHouse/ClickHouse/pull/55199) ([Aleksei Filatov](https://github.com/aalexfvk)). * Added function `byteSwap` which reverses the bytes of unsigned integers. This is particularly useful for reversing values of types which are represented as unsigned integers internally such as IPv4. [#55211](https://github.com/ClickHouse/ClickHouse/pull/55211) ([Priyansh Agrawal](https://github.com/Priyansh121096)). -* Added function `formatQuery()` which returns a formatted version (possibly spanning multiple lines) of a SQL query string. Also added function `formatQuerySingleLine()` which does the same but the returned string will not contain linebreaks. [#55239](https://github.com/ClickHouse/ClickHouse/pull/55239) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Added function `formatQuery` which returns a formatted version (possibly spanning multiple lines) of a SQL query string. Also added function `formatQuerySingleLine` which does the same but the returned string will not contain linebreaks. [#55239](https://github.com/ClickHouse/ClickHouse/pull/55239) ([Salvatore Mesoraca](https://github.com/aiven-sal)). * Added `DWARF` input format that reads debug symbols from an ELF executable/library/object file. [#55450](https://github.com/ClickHouse/ClickHouse/pull/55450) ([Michael Kolupaev](https://github.com/al13n321)). * Allow to save unparsed records and errors in RabbitMQ, NATS and FileLog engines. Add virtual columns `_error` and `_raw_message`(for NATS and RabbitMQ), `_raw_record` (for FileLog) that are filled when ClickHouse fails to parse new record. The behaviour is controlled under storage settings `nats_handle_error_mode` for NATS, `rabbitmq_handle_error_mode` for RabbitMQ, `handle_error_mode` for FileLog similar to `kafka_handle_error_mode`. If it's set to `default`, en exception will be thrown when ClickHouse fails to parse a record, if it's set to `stream`, erorr and raw record will be saved into virtual columns. Closes [#36035](https://github.com/ClickHouse/ClickHouse/issues/36035). [#55477](https://github.com/ClickHouse/ClickHouse/pull/55477) ([Kruglov Pavel](https://github.com/Avogar)). * Keeper client improvement: add `get_all_children_number command` that returns number of all children nodes under a specific path. [#55485](https://github.com/ClickHouse/ClickHouse/pull/55485) ([guoxiaolong](https://github.com/guoxiaolongzte)). @@ -74,11 +426,11 @@ * Reduced memory consumption during loading of hierarchical dictionaries. [#55838](https://github.com/ClickHouse/ClickHouse/pull/55838) ([Nikita Taranov](https://github.com/nickitat)). * All dictionaries support setting `dictionary_use_async_executor`. [#55839](https://github.com/ClickHouse/ClickHouse/pull/55839) ([vdimir](https://github.com/vdimir)). * Prevent excesive memory usage when deserializing AggregateFunctionTopKGenericData. [#55947](https://github.com/ClickHouse/ClickHouse/pull/55947) ([Raúl Marín](https://github.com/Algunenano)). -* On a Keeper with lots of watches AsyncMetrics threads can consume 100% of CPU for noticable time in `DB::KeeperStorage::getSessionsWithWatchesCount()`. The fix is to avoid traversing heavy `watches` and `list_watches` sets. [#56054](https://github.com/ClickHouse/ClickHouse/pull/56054) ([Alexander Gololobov](https://github.com/davenger)). -* Add setting `optimize_trivial_approximate_count_query` to use `count()` approximation for storage EmbeddedRocksDB. Enable trivial count for StorageJoin. [#55806](https://github.com/ClickHouse/ClickHouse/pull/55806) ([Duc Canh Le](https://github.com/canhld94)). +* On a Keeper with lots of watches AsyncMetrics threads can consume 100% of CPU for noticable time in `DB::KeeperStorage::getSessionsWithWatchesCount`. The fix is to avoid traversing heavy `watches` and `list_watches` sets. [#56054](https://github.com/ClickHouse/ClickHouse/pull/56054) ([Alexander Gololobov](https://github.com/davenger)). +* Add setting `optimize_trivial_approximate_count_query` to use `count` approximation for storage EmbeddedRocksDB. Enable trivial count for StorageJoin. [#55806](https://github.com/ClickHouse/ClickHouse/pull/55806) ([Duc Canh Le](https://github.com/canhld94)). #### Improvement -* Functions `toDayOfWeek()` (MySQL alias: `DAYOFWEEK()`), `toYearWeek()` (`YEARWEEK()`) and `toWeek()` (`WEEK()`) now supports `String` arguments. This makes its behavior consistent with MySQL's behavior. [#55589](https://github.com/ClickHouse/ClickHouse/pull/55589) ([Robert Schulze](https://github.com/rschu1ze)). +* Functions `toDayOfWeek` (MySQL alias: `DAYOFWEEK`), `toYearWeek` (`YEARWEEK`) and `toWeek` (`WEEK`) now supports `String` arguments. This makes its behavior consistent with MySQL's behavior. [#55589](https://github.com/ClickHouse/ClickHouse/pull/55589) ([Robert Schulze](https://github.com/rschu1ze)). * Introduced setting `date_time_overflow_behavior` with possible values `ignore`, `throw`, `saturate` that controls the overflow behavior when converting from Date, Date32, DateTime64, Integer or Float to Date, Date32, DateTime or DateTime64. [#55696](https://github.com/ClickHouse/ClickHouse/pull/55696) ([Andrey Zvonov](https://github.com/zvonand)). * Implement query parameters support for `ALTER TABLE ... ACTION PARTITION [ID] {parameter_name:ParameterType}`. Merges [#49516](https://github.com/ClickHouse/ClickHouse/issues/49516). Closes [#49449](https://github.com/ClickHouse/ClickHouse/issues/49449). [#55604](https://github.com/ClickHouse/ClickHouse/pull/55604) ([alesapin](https://github.com/alesapin)). * Print processor ids in a prettier manner in EXPLAIN. [#48852](https://github.com/ClickHouse/ClickHouse/pull/48852) ([Vlad Seliverstov](https://github.com/behebot)). @@ -112,7 +464,7 @@ * Functions `(add|subtract)(Year|Quarter|Month|Week|Day|Hour|Minute|Second|Millisecond|Microsecond|Nanosecond)` now support string-encoded date arguments, e.g. `SELECT addDays('2023-10-22', 1)`. This increases compatibility with MySQL and is needed by Tableau Online. [#55869](https://github.com/ClickHouse/ClickHouse/pull/55869) ([Robert Schulze](https://github.com/rschu1ze)). * The setting `apply_deleted_mask` when disabled allows to read rows that where marked as deleted by lightweight DELETE queries. This is useful for debugging. [#55952](https://github.com/ClickHouse/ClickHouse/pull/55952) ([Alexander Gololobov](https://github.com/davenger)). * Allow skipping `null` values when serailizing Tuple to json objects, which makes it possible to keep compatiability with Spark's `to_json` function, which is also useful for gluten. [#55956](https://github.com/ClickHouse/ClickHouse/pull/55956) ([李扬](https://github.com/taiyang-li)). -* Functions `(add|sub)Date()` now support string-encoded date arguments, e.g. `SELECT addDate('2023-10-22 11:12:13', INTERVAL 5 MINUTE)`. The same support for string-encoded date arguments is added to the plus and minus operators, e.g. `SELECT '2023-10-23' + INTERVAL 1 DAY`. This increases compatibility with MySQL and is needed by Tableau Online. [#55960](https://github.com/ClickHouse/ClickHouse/pull/55960) ([Robert Schulze](https://github.com/rschu1ze)). +* Functions `(add|sub)Date` now support string-encoded date arguments, e.g. `SELECT addDate('2023-10-22 11:12:13', INTERVAL 5 MINUTE)`. The same support for string-encoded date arguments is added to the plus and minus operators, e.g. `SELECT '2023-10-23' + INTERVAL 1 DAY`. This increases compatibility with MySQL and is needed by Tableau Online. [#55960](https://github.com/ClickHouse/ClickHouse/pull/55960) ([Robert Schulze](https://github.com/rschu1ze)). * Allow unquoted strings with CR (`\r`) in CSV format. Closes [#39930](https://github.com/ClickHouse/ClickHouse/issues/39930). [#56046](https://github.com/ClickHouse/ClickHouse/pull/56046) ([Kruglov Pavel](https://github.com/Avogar)). * Allow to run `clickhouse-keeper` using embedded config. [#56086](https://github.com/ClickHouse/ClickHouse/pull/56086) ([Maksim Kita](https://github.com/kitaisreal)). * Set limit of the maximum configuration value for `queued.min.messages` to avoid problem with start fetching data with Kafka. [#56121](https://github.com/ClickHouse/ClickHouse/pull/56121) ([Stas Morozov](https://github.com/r3b-fish)). @@ -133,7 +485,7 @@ * Fixed bug of `match` function (regex) with pattern containing alternation produces incorrect key condition. Closes #53222. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Fix 'Cannot find column' in read-in-order optimization with ARRAY JOIN [#51746](https://github.com/ClickHouse/ClickHouse/pull/51746) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Support missed experimental `Object(Nullable(json))` subcolumns in query. [#54052](https://github.com/ClickHouse/ClickHouse/pull/54052) ([zps](https://github.com/VanDarkholme7)). -* Re-add fix for `accurateCastOrNull()` [#54629](https://github.com/ClickHouse/ClickHouse/pull/54629) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Re-add fix for `accurateCastOrNull` [#54629](https://github.com/ClickHouse/ClickHouse/pull/54629) ([Salvatore Mesoraca](https://github.com/aiven-sal)). * Fix detecting `DEFAULT` for columns of a Distributed table created without AS [#55060](https://github.com/ClickHouse/ClickHouse/pull/55060) ([Vitaly Baranov](https://github.com/vitlibar)). * Proper cleanup in case of exception in ctor of ShellCommandSource [#55103](https://github.com/ClickHouse/ClickHouse/pull/55103) ([Alexander Gololobov](https://github.com/davenger)). * Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)). @@ -191,7 +543,7 @@ * Add error handler to odbc-bridge [#56185](https://github.com/ClickHouse/ClickHouse/pull/56185) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -### ClickHouse release 23.9, 2023-09-28 +### ClickHouse release 23.9, 2023-09-28 #### Backward Incompatible Change * Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -213,7 +565,7 @@ * Add function `decodeHTMLComponent`. [#54097](https://github.com/ClickHouse/ClickHouse/pull/54097) ([Bharat Nallan](https://github.com/bharatnc)). * Added `peak_threads_usage` to query_log table. [#54335](https://github.com/ClickHouse/ClickHouse/pull/54335) ([Alexey Gerasimchuck](https://github.com/Demilivor)). * Add `SHOW FUNCTIONS` support to clickhouse-client. [#54337](https://github.com/ClickHouse/ClickHouse/pull/54337) ([Julia Kartseva](https://github.com/wat-ze-hex)). -* Added function `toDaysSinceYearZero` with alias `TO_DAYS` (for compatibility with MySQL) which returns the number of days passed since `0001-01-01` (in Proleptic Gregorian Calendar). [#54479](https://github.com/ClickHouse/ClickHouse/pull/54479) ([Robert Schulze](https://github.com/rschu1ze)). Function `toDaysSinceYearZero()` now supports arguments of type `DateTime` and `DateTime64`. [#54856](https://github.com/ClickHouse/ClickHouse/pull/54856) ([Serge Klochkov](https://github.com/slvrtrn)). +* Added function `toDaysSinceYearZero` with alias `TO_DAYS` (for compatibility with MySQL) which returns the number of days passed since `0001-01-01` (in Proleptic Gregorian Calendar). [#54479](https://github.com/ClickHouse/ClickHouse/pull/54479) ([Robert Schulze](https://github.com/rschu1ze)). Function `toDaysSinceYearZero` now supports arguments of type `DateTime` and `DateTime64`. [#54856](https://github.com/ClickHouse/ClickHouse/pull/54856) ([Serge Klochkov](https://github.com/slvrtrn)). * Added functions `YYYYMMDDtoDate`, `YYYYMMDDtoDate32`, `YYYYMMDDhhmmssToDateTime` and `YYYYMMDDhhmmssToDateTime64`. They convert a date or date with time encoded as integer (e.g. 20230911) into a native date or date with time. As such, they provide the opposite functionality of existing functions `YYYYMMDDToDate`, `YYYYMMDDToDateTime`, `YYYYMMDDhhmmddToDateTime`, `YYYYMMDDhhmmddToDateTime64`. [#54509](https://github.com/ClickHouse/ClickHouse/pull/54509) ([Quanfa Fu](https://github.com/dentiscalprum)) ([Robert Schulze](https://github.com/rschu1ze)). * Add several string distance functions, including `byteHammingDistance`, `editDistance`. [#54935](https://github.com/ClickHouse/ClickHouse/pull/54935) ([flynn](https://github.com/ucasfl)). * Allow specifying the expiration date and, optionally, the time for user credentials with `VALID UNTIL datetime` clause. [#51261](https://github.com/ClickHouse/ClickHouse/pull/51261) ([Nikolay Degterinsky](https://github.com/evillique)). @@ -229,7 +581,7 @@ * An optimization to rewrite `COUNT(DISTINCT ...)` and various `uniq` variants to `count` if it is selected from a subquery with GROUP BY. [#52082](https://github.com/ClickHouse/ClickHouse/pull/52082) [#52645](https://github.com/ClickHouse/ClickHouse/pull/52645) ([JackyWoo](https://github.com/JackyWoo)). * Remove manual calls to `mmap/mremap/munmap` and delegate all this work to `jemalloc` - and it slightly improves performance. [#52792](https://github.com/ClickHouse/ClickHouse/pull/52792) ([Nikita Taranov](https://github.com/nickitat)). * Fixed high in CPU consumption when working with NATS. [#54399](https://github.com/ClickHouse/ClickHouse/pull/54399) ([Vasilev Pyotr](https://github.com/vahpetr)). -* Since we use separate instructions for executing `toString()` with datetime argument, it is possible to improve performance a bit for non-datetime arguments and have some parts of the code cleaner. Follows up [#53680](https://github.com/ClickHouse/ClickHouse/issues/53680). [#54443](https://github.com/ClickHouse/ClickHouse/pull/54443) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Since we use separate instructions for executing `toString` with datetime argument, it is possible to improve performance a bit for non-datetime arguments and have some parts of the code cleaner. Follows up [#53680](https://github.com/ClickHouse/ClickHouse/issues/53680). [#54443](https://github.com/ClickHouse/ClickHouse/pull/54443) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). * Instead of serializing json elements into a `std::stringstream`, this PR try to put the serialization result into `ColumnString` direclty. [#54613](https://github.com/ClickHouse/ClickHouse/pull/54613) ([lgbo](https://github.com/lgbo-ustc)). * Enable ORDER BY optimization for reading data in corresponding order from a MergeTree table in case that the table is behind a view. [#54628](https://github.com/ClickHouse/ClickHouse/pull/54628) ([Vitaly Baranov](https://github.com/vitlibar)). * Improve JSON SQL functions by reusing `GeneratorJSONPath` and removing several shared pointers. [#54735](https://github.com/ClickHouse/ClickHouse/pull/54735) ([lgbo](https://github.com/lgbo-ustc)). @@ -479,7 +831,7 @@ * The `domainRFC` function now supports IPv6 in square brackets. [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). * Use longer timeout for S3 CopyObject requests, which are used in backups. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). * Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* `SCHEMA()` was added as alias for `DATABASE()` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). +* `SCHEMA` was added as alias for `DATABASE` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). * Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * As expert-level settings, it is now possible to (1) configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, (2) configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). @@ -741,7 +1093,7 @@ * Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). * Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). * Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). -* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix `countSubstrings` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). * Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). * Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). * Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). @@ -1585,7 +1937,7 @@ * A couple of segfaults have been reported around `c-ares`. They were introduced in my previous pull requests. I have fixed them with the help of Alexander Tokmakov. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). * Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). * Set compression method and level for backup Closes [#45690](https://github.com/ClickHouse/ClickHouse/issues/45690). [#45737](https://github.com/ClickHouse/ClickHouse/pull/45737) ([Pradeep Chhetri](https://github.com/chhetripradeep)). -* Should use `select_query_typed.limitByOffset()` instead of `select_query_typed.limitOffset()`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)). +* Should use `select_query_typed.limitByOffset` instead of `select_query_typed.limitOffset`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)). * When use experimental analyzer, queries like `SELECT number FROM numbers(100) LIMIT 10 OFFSET 10;` get wrong results (empty result for this sql). That is caused by an unnecessary offset step added by planner. [#45822](https://github.com/ClickHouse/ClickHouse/pull/45822) ([刘陶峰](https://github.com/taofengliu)). * Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). diff --git a/LICENSE b/LICENSE index 65c5df824c6..c653e59a8f3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2016-2023 ClickHouse, Inc. +Copyright 2016-2024 ClickHouse, Inc. Apache License Version 2.0, January 2004 @@ -188,7 +188,7 @@ Copyright 2016-2023 ClickHouse, Inc. same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2016-2023 ClickHouse, Inc. + Copyright 2016-2024 ClickHouse, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index bf8ef0b4e98..c56b3c2fd0d 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,7 @@ curl https://clickhouse.com/ | sh ## Upcoming Events -* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30 -* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11 -* [**ClickHouse Meetup in Sydney**](https://www.meetup.com/clickhouse-sydney-user-group/events/297638812/) - Dec 12 -* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12 - -Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. +Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" diff --git a/SECURITY.md b/SECURITY.md index 5477628cee4..a200e172a3b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,8 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.12 | ✔️ | +| 23.11 | ✔️ | | 23.10 | ✔️ | -| 23.9 | ✔️ | +| 23.9 | ❌ | | 23.8 | ✔️ | | 23.7 | ❌ | | 23.6 | ❌ | diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index 49bb81a58be..738cda47877 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -30,7 +30,6 @@ int __gai_sigqueue(int sig, const union sigval val, pid_t caller_pid) } -#include #include #include diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt index d0dde8a51a5..dfb41a33fb1 100644 --- a/base/poco/Foundation/CMakeLists.txt +++ b/base/poco/Foundation/CMakeLists.txt @@ -55,7 +55,6 @@ set (SRCS src/DigestStream.cpp src/DirectoryIterator.cpp src/DirectoryIteratorStrategy.cpp - src/DirectoryWatcher.cpp src/Environment.cpp src/Error.cpp src/ErrorHandler.cpp diff --git a/base/poco/Foundation/include/Poco/DirectoryWatcher.h b/base/poco/Foundation/include/Poco/DirectoryWatcher.h deleted file mode 100644 index 00964a5512a..00000000000 --- a/base/poco/Foundation/include/Poco/DirectoryWatcher.h +++ /dev/null @@ -1,228 +0,0 @@ -// -// DirectoryWatcher.h -// -// Library: Foundation -// Package: Filesystem -// Module: DirectoryWatcher -// -// Definition of the DirectoryWatcher class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_DirectoryWatcher_INCLUDED -#define Foundation_DirectoryWatcher_INCLUDED - - -#include "Poco/Foundation.h" - - -#ifndef POCO_NO_INOTIFY - - -# include "Poco/AtomicCounter.h" -# include "Poco/BasicEvent.h" -# include "Poco/File.h" -# include "Poco/Runnable.h" -# include "Poco/Thread.h" - - -namespace Poco -{ - - -class DirectoryWatcherStrategy; - - -class Foundation_API DirectoryWatcher : protected Runnable -/// This class is used to get notifications about changes -/// to the filesystem, more specifically, to a specific -/// directory. Changes to a directory are reported via -/// events. -/// -/// A thread will be created that watches the specified -/// directory for changes. Events are reported in the context -/// of this thread. -/// -/// Note that changes to files in subdirectories of the watched -/// directory are not reported. Separate DirectoryWatcher objects -/// must be created for these directories if they should be watched. -/// -/// Changes to file attributes are not reported. -/// -/// On Windows, this class is implemented using FindFirstChangeNotification()/FindNextChangeNotification(). -/// On Linux, this class is implemented using inotify. -/// On FreeBSD and Darwin (Mac OS X, iOS), this class uses kevent/kqueue. -/// On all other platforms, the watched directory is periodically scanned -/// for changes. This can negatively affect performance if done too often. -/// Therefore, the interval in which scans are done can be specified in -/// the constructor. Note that periodic scanning will also be done on FreeBSD -/// and Darwin if events for changes to files (DW_ITEM_MODIFIED) are enabled. -/// -/// DW_ITEM_MOVED_FROM and DW_ITEM_MOVED_TO events will only be reported -/// on Linux. On other platforms, a file rename or move operation -/// will be reported via a DW_ITEM_REMOVED and a DW_ITEM_ADDED event. -/// The order of these two events is not defined. -/// -/// An event mask can be specified to enable only certain events. -{ -public: - enum DirectoryEventType - { - DW_ITEM_ADDED = 1, - /// A new item has been created and added to the directory. - - DW_ITEM_REMOVED = 2, - /// An item has been removed from the directory. - - DW_ITEM_MODIFIED = 4, - /// An item has been modified. - - DW_ITEM_MOVED_FROM = 8, - /// An item has been renamed or moved. This event delivers the old name. - - DW_ITEM_MOVED_TO = 16 - /// An item has been renamed or moved. This event delivers the new name. - }; - - enum DirectoryEventMask - { - DW_FILTER_ENABLE_ALL = 31, - /// Enables all event types. - - DW_FILTER_DISABLE_ALL = 0 - /// Disables all event types. - }; - - enum - { - DW_DEFAULT_SCAN_INTERVAL = 5 /// Default scan interval for platforms that don't provide a native notification mechanism. - }; - - struct DirectoryEvent - { - DirectoryEvent(const File & f, DirectoryEventType ev) : item(f), event(ev) { } - - const File & item; /// The directory or file that has been changed. - DirectoryEventType event; /// The kind of event. - }; - - BasicEvent itemAdded; - /// Fired when a file or directory has been created or added to the directory. - - BasicEvent itemRemoved; - /// Fired when a file or directory has been removed from the directory. - - BasicEvent itemModified; - /// Fired when a file or directory has been modified. - - BasicEvent itemMovedFrom; - /// Fired when a file or directory has been renamed. This event delivers the old name. - - BasicEvent itemMovedTo; - /// Fired when a file or directory has been moved. This event delivers the new name. - - BasicEvent scanError; - /// Fired when an error occurs while scanning for changes. - - DirectoryWatcher(const std::string & path, int eventMask = DW_FILTER_ENABLE_ALL, int scanInterval = DW_DEFAULT_SCAN_INTERVAL); - /// Creates a DirectoryWatcher for the directory given in path. - /// To enable only specific events, an eventMask can be specified by - /// OR-ing the desired event IDs (e.g., DW_ITEM_ADDED | DW_ITEM_MODIFIED). - /// On platforms where no native filesystem notifications are available, - /// scanInterval specifies the interval in seconds between scans - /// of the directory. - - DirectoryWatcher(const File & directory, int eventMask = DW_FILTER_ENABLE_ALL, int scanInterval = DW_DEFAULT_SCAN_INTERVAL); - /// Creates a DirectoryWatcher for the specified directory - /// To enable only specific events, an eventMask can be specified by - /// OR-ing the desired event IDs (e.g., DW_ITEM_ADDED | DW_ITEM_MODIFIED). - /// On platforms where no native filesystem notifications are available, - /// scanInterval specifies the interval in seconds between scans - /// of the directory. - - ~DirectoryWatcher(); - /// Destroys the DirectoryWatcher. - - void suspendEvents(); - /// Suspends sending of events. Can be called multiple times, but every - /// call to suspendEvent() must be matched by a call to resumeEvents(). - - void resumeEvents(); - /// Resumes events, after they have been suspended with a call to suspendEvents(). - - bool eventsSuspended() const; - /// Returns true iff events are suspended. - - int eventMask() const; - /// Returns the value of the eventMask passed to the constructor. - - int scanInterval() const; - /// Returns the scan interval in seconds. - - const File & directory() const; - /// Returns the directory being watched. - - bool supportsMoveEvents() const; - /// Returns true iff the platform supports DW_ITEM_MOVED_FROM/itemMovedFrom and - /// DW_ITEM_MOVED_TO/itemMovedTo events. - -protected: - void init(); - void stop(); - void run(); - -private: - DirectoryWatcher(); - DirectoryWatcher(const DirectoryWatcher &); - DirectoryWatcher & operator=(const DirectoryWatcher &); - - Thread _thread; - File _directory; - int _eventMask; - AtomicCounter _eventsSuspended; - int _scanInterval; - DirectoryWatcherStrategy * _pStrategy; -}; - - -// -// inlines -// - - -inline bool DirectoryWatcher::eventsSuspended() const -{ - return _eventsSuspended.value() > 0; -} - - -inline int DirectoryWatcher::eventMask() const -{ - return _eventMask; -} - - -inline int DirectoryWatcher::scanInterval() const -{ - return _scanInterval; -} - - -inline const File & DirectoryWatcher::directory() const -{ - return _directory; -} - - -} // namespace Poco - - -#endif // POCO_NO_INOTIFY - - -#endif // Foundation_DirectoryWatcher_INCLUDED diff --git a/base/poco/Foundation/include/Poco/StreamUtil.h b/base/poco/Foundation/include/Poco/StreamUtil.h index fa1814a0f2e..ed0a4fb5154 100644 --- a/base/poco/Foundation/include/Poco/StreamUtil.h +++ b/base/poco/Foundation/include/Poco/StreamUtil.h @@ -69,6 +69,9 @@ // init() is called in the MyIOS constructor. // Therefore we replace each call to init() with // the poco_ios_init macro defined below. +// +// Also this macro will adjust exceptions() flags, since by default std::ios +// will hide exceptions, while in ClickHouse it is better to pass them through. #if !defined(POCO_IOS_INIT_HACK) @@ -79,7 +82,10 @@ #if defined(POCO_IOS_INIT_HACK) # define poco_ios_init(buf) #else -# define poco_ios_init(buf) init(buf) +# define poco_ios_init(buf) do { \ + init(buf); \ + this->exceptions(std::ios::failbit | std::ios::badbit); \ +} while (0) #endif diff --git a/base/poco/Foundation/include/Poco/UTF32Encoding.h b/base/poco/Foundation/include/Poco/UTF32Encoding.h index e6784e787cc..dafac005e83 100644 --- a/base/poco/Foundation/include/Poco/UTF32Encoding.h +++ b/base/poco/Foundation/include/Poco/UTF32Encoding.h @@ -70,6 +70,15 @@ public: int queryConvert(const unsigned char * bytes, int length) const; int sequenceLength(const unsigned char * bytes, int length) const; +protected: + static int safeToInt(Poco::UInt32 value) + { + if (value <= 0x10FFFF) + return static_cast(value); + else + return -1; + } + private: bool _flipBytes; static const char * _names[]; diff --git a/base/poco/Foundation/src/DirectoryWatcher.cpp b/base/poco/Foundation/src/DirectoryWatcher.cpp deleted file mode 100644 index b559da65e09..00000000000 --- a/base/poco/Foundation/src/DirectoryWatcher.cpp +++ /dev/null @@ -1,602 +0,0 @@ -// -// DirectoryWatcher.cpp -// -// Library: Foundation -// Package: Filesystem -// Module: DirectoryWatcher -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/DirectoryWatcher.h" - - -#ifndef POCO_NO_INOTIFY - - -#include "Poco/Path.h" -#include "Poco/Glob.h" -#include "Poco/DirectoryIterator.h" -#include "Poco/Event.h" -#include "Poco/Exception.h" -#include "Poco/Buffer.h" -#if POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID - #include - #include - #include -#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD - #include - #include - #include - #include - #include - #if (POCO_OS == POCO_OS_FREE_BSD) && !defined(O_EVTONLY) - #define O_EVTONLY 0x8000 - #endif -#endif -#include -#include -#include - -namespace Poco { - - -class DirectoryWatcherStrategy -{ -public: - DirectoryWatcherStrategy(DirectoryWatcher& owner): - _owner(owner) - { - } - - virtual ~DirectoryWatcherStrategy() - { - } - - DirectoryWatcher& owner() - { - return _owner; - } - - virtual void run() = 0; - virtual void stop() = 0; - virtual bool supportsMoveEvents() const = 0; - -protected: - struct ItemInfo - { - ItemInfo(): - size(0) - { - } - - ItemInfo(const ItemInfo& other): - path(other.path), - size(other.size), - lastModified(other.lastModified) - { - } - - explicit ItemInfo(const File& f): - path(f.path()), - size(f.isFile() ? f.getSize() : 0), - lastModified(f.getLastModified()) - { - } - - std::string path; - File::FileSize size; - Timestamp lastModified; - }; - typedef std::map ItemInfoMap; - - void scan(ItemInfoMap& entries) - { - DirectoryIterator it(owner().directory()); - DirectoryIterator end; - while (it != end) - { - entries[it.path().getFileName()] = ItemInfo(*it); - ++it; - } - } - - void compare(ItemInfoMap& oldEntries, ItemInfoMap& newEntries) - { - for (ItemInfoMap::iterator itn = newEntries.begin(); itn != newEntries.end(); ++itn) - { - ItemInfoMap::iterator ito = oldEntries.find(itn->first); - if (ito != oldEntries.end()) - { - if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) && !owner().eventsSuspended()) - { - if (itn->second.size != ito->second.size || itn->second.lastModified != ito->second.lastModified) - { - Poco::File f(itn->second.path); - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MODIFIED); - owner().itemModified(&owner(), ev); - } - } - oldEntries.erase(ito); - } - else if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED) && !owner().eventsSuspended()) - { - Poco::File f(itn->second.path); - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_ADDED); - owner().itemAdded(&owner(), ev); - } - } - if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED) && !owner().eventsSuspended()) - { - for (ItemInfoMap::iterator it = oldEntries.begin(); it != oldEntries.end(); ++it) - { - Poco::File f(it->second.path); - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_REMOVED); - owner().itemRemoved(&owner(), ev); - } - } - } - -private: - DirectoryWatcherStrategy(); - DirectoryWatcherStrategy(const DirectoryWatcherStrategy&); - DirectoryWatcherStrategy& operator = (const DirectoryWatcherStrategy&); - - DirectoryWatcher& _owner; -}; - - -#if POCO_OS == POCO_OS_WINDOWS_NT - - -class WindowsDirectoryWatcherStrategy: public DirectoryWatcherStrategy -{ -public: - WindowsDirectoryWatcherStrategy(DirectoryWatcher& owner): - DirectoryWatcherStrategy(owner) - { - _hStopped = CreateEventW(NULL, FALSE, FALSE, NULL); - if (!_hStopped) - throw SystemException("cannot create event"); - } - - ~WindowsDirectoryWatcherStrategy() - { - CloseHandle(_hStopped); - } - - void run() - { - ItemInfoMap entries; - scan(entries); - - DWORD filter = FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) - filter |= FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE; - - std::string path(owner().directory().path()); - HANDLE hChange = FindFirstChangeNotificationA(path.c_str(), FALSE, filter); - - if (hChange == INVALID_HANDLE_VALUE) - { - try - { - FileImpl::handleLastErrorImpl(path); - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - return; - } - - bool stopped = false; - while (!stopped) - { - try - { - HANDLE h[2]; - h[0] = _hStopped; - h[1] = hChange; - switch (WaitForMultipleObjects(2, h, FALSE, INFINITE)) - { - case WAIT_OBJECT_0: - stopped = true; - break; - case WAIT_OBJECT_0 + 1: - { - ItemInfoMap newEntries; - scan(newEntries); - compare(entries, newEntries); - std::swap(entries, newEntries); - if (FindNextChangeNotification(hChange) == FALSE) - { - FileImpl::handleLastErrorImpl(path); - } - } - break; - default: - throw SystemException("failed to wait for directory changes"); - } - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - } - FindCloseChangeNotification(hChange); - } - - void stop() - { - SetEvent(_hStopped); - } - - bool supportsMoveEvents() const - { - return false; - } - -private: - HANDLE _hStopped; -}; - - -#elif POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID - - -class LinuxDirectoryWatcherStrategy: public DirectoryWatcherStrategy -{ -public: - LinuxDirectoryWatcherStrategy(DirectoryWatcher& owner): - DirectoryWatcherStrategy(owner), - _fd(-1), - _stopped(false) - { - _fd = inotify_init(); - if (_fd == -1) throw Poco::IOException("cannot initialize inotify", errno); - } - - ~LinuxDirectoryWatcherStrategy() - { - close(_fd); - } - - void run() - { - int mask = 0; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED) - mask |= IN_CREATE; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED) - mask |= IN_DELETE; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) - mask |= IN_MODIFY; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_FROM) - mask |= IN_MOVED_FROM; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_TO) - mask |= IN_MOVED_TO; - int wd = inotify_add_watch(_fd, owner().directory().path().c_str(), mask); - if (wd == -1) - { - try - { - FileImpl::handleLastErrorImpl(owner().directory().path()); - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - } - - Poco::Buffer buffer(4096); - while (!_stopped.load(std::memory_order_relaxed)) - { - fd_set fds; - FD_ZERO(&fds); - FD_SET(_fd, &fds); - - struct timeval tv; - tv.tv_sec = 0; - tv.tv_usec = 200000; - - if (select(_fd + 1, &fds, NULL, NULL, &tv) == 1) - { - int n = read(_fd, buffer.begin(), buffer.size()); - int i = 0; - if (n > 0) - { - while (n > 0) - { - struct inotify_event* event = reinterpret_cast(buffer.begin() + i); - - if (event->len > 0) - { - if (!owner().eventsSuspended()) - { - Poco::Path p(owner().directory().path()); - p.makeDirectory(); - p.setFileName(event->name); - Poco::File f(p.toString()); - - if ((event->mask & IN_CREATE) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_ADDED); - owner().itemAdded(&owner(), ev); - } - if ((event->mask & IN_DELETE) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_REMOVED); - owner().itemRemoved(&owner(), ev); - } - if ((event->mask & IN_MODIFY) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MODIFIED); - owner().itemModified(&owner(), ev); - } - if ((event->mask & IN_MOVED_FROM) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_FROM)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MOVED_FROM); - owner().itemMovedFrom(&owner(), ev); - } - if ((event->mask & IN_MOVED_TO) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_TO)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MOVED_TO); - owner().itemMovedTo(&owner(), ev); - } - } - } - - i += sizeof(inotify_event) + event->len; - n -= sizeof(inotify_event) + event->len; - } - } - } - } - } - - void stop() - { - _stopped.store(true, std::memory_order_relaxed); - } - - bool supportsMoveEvents() const - { - return true; - } - -private: - int _fd; - std::atomic _stopped; -}; - - -#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD - - -class BSDDirectoryWatcherStrategy: public DirectoryWatcherStrategy -{ -public: - BSDDirectoryWatcherStrategy(DirectoryWatcher& owner): - DirectoryWatcherStrategy(owner), - _queueFD(-1), - _dirFD(-1), - _stopped(false) - { - _dirFD = open(owner.directory().path().c_str(), O_EVTONLY); - if (_dirFD < 0) throw Poco::FileNotFoundException(owner.directory().path()); - _queueFD = kqueue(); - if (_queueFD < 0) - { - close(_dirFD); - throw Poco::SystemException("Cannot create kqueue", errno); - } - } - - ~BSDDirectoryWatcherStrategy() - { - close(_dirFD); - close(_queueFD); - } - - void run() - { - Poco::Timestamp lastScan; - ItemInfoMap entries; - scan(entries); - - while (!_stopped.load(std::memory_order_relaxed)) - { - struct timespec timeout; - timeout.tv_sec = 0; - timeout.tv_nsec = 200000000; - unsigned eventFilter = NOTE_WRITE; - struct kevent event; - struct kevent eventData; - EV_SET(&event, _dirFD, EVFILT_VNODE, EV_ADD | EV_CLEAR, eventFilter, 0, 0); - int nEvents = kevent(_queueFD, &event, 1, &eventData, 1, &timeout); - if (nEvents < 0 || eventData.flags == EV_ERROR) - { - try - { - FileImpl::handleLastErrorImpl(owner().directory().path()); - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - } - else if (nEvents > 0 || ((owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) && lastScan.isElapsed(owner().scanInterval()*1000000))) - { - ItemInfoMap newEntries; - scan(newEntries); - compare(entries, newEntries); - std::swap(entries, newEntries); - lastScan.update(); - } - } - } - - void stop() - { - _stopped.store(true, std::memory_order_relaxed); - } - - bool supportsMoveEvents() const - { - return false; - } - -private: - int _queueFD; - int _dirFD; - std::atomic _stopped; -}; - - -#else - - -class PollingDirectoryWatcherStrategy: public DirectoryWatcherStrategy -{ -public: - PollingDirectoryWatcherStrategy(DirectoryWatcher& owner): - DirectoryWatcherStrategy(owner) - { - } - - ~PollingDirectoryWatcherStrategy() - { - } - - void run() - { - ItemInfoMap entries; - scan(entries); - while (!_stopped.tryWait(1000*owner().scanInterval())) - { - try - { - ItemInfoMap newEntries; - scan(newEntries); - compare(entries, newEntries); - std::swap(entries, newEntries); - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - } - } - - void stop() - { - _stopped.set(); - } - - bool supportsMoveEvents() const - { - return false; - } - -private: - Poco::Event _stopped; -}; - - -#endif - - -DirectoryWatcher::DirectoryWatcher(const std::string& path, int eventMask, int scanInterval): - _directory(path), - _eventMask(eventMask), - _scanInterval(scanInterval) -{ - init(); -} - - -DirectoryWatcher::DirectoryWatcher(const Poco::File& directory, int eventMask, int scanInterval): - _directory(directory), - _eventMask(eventMask), - _scanInterval(scanInterval) -{ - init(); -} - - -DirectoryWatcher::~DirectoryWatcher() -{ - try - { - stop(); - delete _pStrategy; - } - catch (...) - { - poco_unexpected(); - } -} - - -void DirectoryWatcher::suspendEvents() -{ - poco_assert (_eventsSuspended > 0); - - _eventsSuspended--; -} - - -void DirectoryWatcher::resumeEvents() -{ - _eventsSuspended++; -} - - -void DirectoryWatcher::init() -{ - if (!_directory.exists()) - throw Poco::FileNotFoundException(_directory.path()); - - if (!_directory.isDirectory()) - throw Poco::InvalidArgumentException("not a directory", _directory.path()); - -#if POCO_OS == POCO_OS_WINDOWS_NT - _pStrategy = new WindowsDirectoryWatcherStrategy(*this); -#elif POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID - _pStrategy = new LinuxDirectoryWatcherStrategy(*this); -#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD - _pStrategy = new BSDDirectoryWatcherStrategy(*this); -#else - _pStrategy = new PollingDirectoryWatcherStrategy(*this); -#endif - _thread.start(*this); -} - - -void DirectoryWatcher::run() -{ - _pStrategy->run(); -} - - -void DirectoryWatcher::stop() -{ - _pStrategy->stop(); - _thread.join(); -} - - -bool DirectoryWatcher::supportsMoveEvents() const -{ - return _pStrategy->supportsMoveEvents(); -} - - -} // namespace Poco - - -#endif // POCO_NO_INOTIFY diff --git a/base/poco/Foundation/src/UTF32Encoding.cpp b/base/poco/Foundation/src/UTF32Encoding.cpp index ff07006a4fb..e600c5d9445 100644 --- a/base/poco/Foundation/src/UTF32Encoding.cpp +++ b/base/poco/Foundation/src/UTF32Encoding.cpp @@ -30,22 +30,22 @@ const char* UTF32Encoding::_names[] = const TextEncoding::CharacterMap UTF32Encoding::_charMap = { - /* 00 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 10 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 20 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 30 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 40 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 50 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 60 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 70 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 80 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* 90 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* a0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* b0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* e0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - /* f0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + /* 00 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 10 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 20 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 30 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 40 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 50 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 60 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 70 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 80 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* 90 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* a0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* b0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* c0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* d0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* e0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + /* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, }; @@ -118,7 +118,7 @@ const TextEncoding::CharacterMap& UTF32Encoding::characterMap() const int UTF32Encoding::convert(const unsigned char* bytes) const { UInt32 uc; - unsigned char* p = (unsigned char*) &uc; + unsigned char* p = reinterpret_cast(&uc); *p++ = *bytes++; *p++ = *bytes++; *p++ = *bytes++; @@ -129,7 +129,7 @@ int UTF32Encoding::convert(const unsigned char* bytes) const ByteOrder::flipBytes(uc); } - return uc; + return safeToInt(uc); } @@ -138,7 +138,7 @@ int UTF32Encoding::convert(int ch, unsigned char* bytes, int length) const if (bytes && length >= 4) { UInt32 ch1 = _flipBytes ? ByteOrder::flipBytes((UInt32) ch) : (UInt32) ch; - unsigned char* p = (unsigned char*) &ch1; + unsigned char* p = reinterpret_cast(&ch1); *bytes++ = *p++; *bytes++ = *p++; *bytes++ = *p++; @@ -155,14 +155,14 @@ int UTF32Encoding::queryConvert(const unsigned char* bytes, int length) const if (length >= 4) { UInt32 uc; - unsigned char* p = (unsigned char*) &uc; + unsigned char* p = reinterpret_cast(&uc); *p++ = *bytes++; *p++ = *bytes++; *p++ = *bytes++; *p++ = *bytes++; if (_flipBytes) ByteOrder::flipBytes(uc); - return uc; + ret = safeToInt(uc); } return ret; diff --git a/base/poco/Util/src/XMLConfiguration.cpp b/base/poco/Util/src/XMLConfiguration.cpp index e0d363cc870..648084aa28e 100644 --- a/base/poco/Util/src/XMLConfiguration.cpp +++ b/base/poco/Util/src/XMLConfiguration.cpp @@ -18,6 +18,7 @@ #ifndef POCO_UTIL_NO_XMLCONFIGURATION +#include "Poco/String.h" #include "Poco/SAX/InputSource.h" #include "Poco/DOM/DOMParser.h" #include "Poco/DOM/Element.h" @@ -28,6 +29,8 @@ #include "Poco/NumberParser.h" #include "Poco/NumberFormatter.h" #include +#include +#include namespace Poco { @@ -275,8 +278,9 @@ void XMLConfiguration::enumerate(const std::string& key, Keys& range) const { if (pChild->nodeType() == Poco::XML::Node::ELEMENT_NODE) { - const std::string& nodeName = pChild->nodeName(); + std::string nodeName = pChild->nodeName(); size_t& count = keys[nodeName]; + replaceInPlace(nodeName, ".", "\\."); if (count) range.push_back(nodeName + "[" + NumberFormatter::format(count) + "]"); else @@ -379,7 +383,21 @@ Poco::XML::Node* XMLConfiguration::findNode(std::string::const_iterator& it, con { while (it != end && *it == _delim) ++it; std::string key; - while (it != end && *it != _delim && *it != '[') key += *it++; + while (it != end) + { + if (*it == '\\' && std::distance(it, end) > 1) + { + // Skip backslash, copy only the char after it + std::advance(it, 1); + key += *it++; + continue; + } + if (*it == _delim) + break; + if (*it == '[') + break; + key += *it++; + } return findNode(it, end, findElement(key, pNode, create), create); } } diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 1e0a82a1403..e5a8c064808 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54480) -SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 11) +SET(VERSION_REVISION 54482) +SET(VERSION_MAJOR 24) +SET(VERSION_MINOR 1) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 13adae0e42fd48de600486fc5d4b64d39f80c43e) -SET(VERSION_DESCRIBE v23.11.1.1-testing) -SET(VERSION_STRING 23.11.1.1) +SET(VERSION_GITHASH a2faa65b080a587026c86844f3a20c74d23a86f8) +SET(VERSION_DESCRIBE v24.1.1.1-testing) +SET(VERSION_STRING 24.1.1.1) # end of autochange diff --git a/cmake/target.cmake b/cmake/target.cmake index 887f79bf24e..fb911ace7b5 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -12,6 +12,8 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") set (OS_DARWIN 1) add_definitions(-D OS_DARWIN) + # For MAP_ANON/MAP_ANONYMOUS + add_definitions(-D _DARWIN_C_SOURCE) elseif (CMAKE_SYSTEM_NAME MATCHES "SunOS") set (OS_SUNOS 1) add_definitions(-D OS_SUNOS) @@ -42,10 +44,8 @@ if (CMAKE_CROSSCOMPILING) if (ARCH_AARCH64) # FIXME: broken dependencies set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_SENTRY OFF CACHE INTERNAL "") elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_SENTRY OFF CACHE INTERNAL "") elseif (ARCH_RISCV64) # RISC-V support is preliminary set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") @@ -73,19 +73,5 @@ if (CMAKE_CROSSCOMPILING) message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") endif () - if (USE_MUSL) - # use of undeclared identifier 'PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP' - set (ENABLE_SENTRY OFF CACHE INTERNAL "") - set (ENABLE_ODBC OFF CACHE INTERNAL "") - set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_HDFS OFF CACHE INTERNAL "") - set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") - # use of drand48_data - set (ENABLE_AZURE_BLOB_STORAGE OFF CACHE INTERNAL "") - endif () - - # Don't know why but CXX_STANDARD doesn't work for cross-compilation - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20") - message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}") endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 3d3b9b72faf..1b5ba15187f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -135,9 +135,9 @@ add_contrib (libuv-cmake libuv) add_contrib (liburing-cmake liburing) add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv add_contrib (cassandra-cmake cassandra) # requires: libuv +add_contrib (curl-cmake curl) +add_contrib (azure-cmake azure) # requires: curl if (NOT OS_DARWIN) - add_contrib (curl-cmake curl) - add_contrib (azure-cmake azure) # requires: curl add_contrib (sentry-native-cmake sentry-native) # requires: curl endif() add_contrib (fmtlib-cmake fmtlib) diff --git a/contrib/azure b/contrib/azure index 096049bf24f..060c54dfb0a 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 096049bf24fffafcaccc132b9367694532716731 +Subproject commit 060c54dfb0abe869c065143303a9d3e9c54c29e3 diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt index bb44c993e79..0d2512c9e6e 100644 --- a/contrib/azure-cmake/CMakeLists.txt +++ b/contrib/azure-cmake/CMakeLists.txt @@ -8,37 +8,21 @@ endif() set(AZURE_DIR "${ClickHouse_SOURCE_DIR}/contrib/azure") set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk") -file(GLOB AZURE_SDK_CORE_SRC +file(GLOB AZURE_SDK_SRC "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp" "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp" "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.hpp" "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/winhttp/*.cpp" "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/io/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/private/*.hpp" -) - -file(GLOB AZURE_SDK_IDENTITY_SRC + "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/tracing/*.cpp" "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/private/*.hpp" -) - -file(GLOB AZURE_SDK_STORAGE_COMMON_SRC - "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/private/*.cpp" -) - -file(GLOB AZURE_SDK_STORAGE_BLOBS_SRC "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/*.cpp" - "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.hpp" + "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.cpp" + "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp" ) file(GLOB AZURE_SDK_UNIFIED_SRC - ${AZURE_SDK_CORE_SRC} - ${AZURE_SDK_IDENTITY_SRC} - ${AZURE_SDK_STORAGE_COMMON_SRC} - ${AZURE_SDK_STORAGE_BLOBS_SRC} + ${AZURE_SDK_SRC} ) set(AZURE_SDK_INCLUDES diff --git a/contrib/boringssl b/contrib/boringssl index 8061ac62d67..aa6d2f865a2 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit 8061ac62d67953e61b793042e33baf1352e67510 +Subproject commit aa6d2f865a2eab01cf94f197e11e36b6de47b5b4 diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 7e86352befc..e74629e57b3 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -10,7 +10,7 @@ set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") set (SRCS "${LIBRARY_DIR}/lib/altsvc.c" "${LIBRARY_DIR}/lib/amigaos.c" - "${LIBRARY_DIR}/lib/asyn-thread.c" + "${LIBRARY_DIR}/lib/asyn-ares.c" "${LIBRARY_DIR}/lib/base64.c" "${LIBRARY_DIR}/lib/bufq.c" "${LIBRARY_DIR}/lib/bufref.c" @@ -165,13 +165,14 @@ target_compile_definitions (_curl PRIVATE libcurl_EXPORTS OS="${CMAKE_SYSTEM_NAME}" ) + target_include_directories (_curl SYSTEM PUBLIC "${LIBRARY_DIR}/include" "${LIBRARY_DIR}/lib" . # curl_config.h ) -target_link_libraries (_curl PRIVATE OpenSSL::SSL) +target_link_libraries (_curl PRIVATE OpenSSL::SSL ch_contrib::c-ares) # The library is large - avoid bloat (XXX: is it?) if (OMIT_HEAVY_DEBUG_SYMBOLS) diff --git a/contrib/curl-cmake/curl_config.h b/contrib/curl-cmake/curl_config.h index f56ba3eccd5..a38aa60fe6d 100644 --- a/contrib/curl-cmake/curl_config.h +++ b/contrib/curl-cmake/curl_config.h @@ -50,3 +50,4 @@ #define ENABLE_IPV6 #define USE_OPENSSL #define USE_THREADS_POSIX +#define USE_ARES diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index bdcb91354b1..b9598e60167 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit bdcb91354b1c05b21e73043a112a6f1e3b013497 +Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103 diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index 4278575fd7f..8cd951af746 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -26,6 +26,11 @@ ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS) ADD_DEFINITIONS(-D_GNU_SOURCE) ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP) ADD_DEFINITIONS(-DHAVE_NANOSLEEP) + +if (USE_MUSL) + ADD_DEFINITIONS(-DSTRERROR_R_RETURN_INT) +endif () + set(HAVE_STEADY_CLOCK 1) set(HAVE_NESTED_EXCEPTION 1) SET(HAVE_BOOST_CHRONO 0) diff --git a/contrib/librdkafka b/contrib/librdkafka index 6f3b483426a..2d2aab6f5b7 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f +Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082 diff --git a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h index 9eabfaa50c8..c2faeb47cb1 100644 --- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h +++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h @@ -270,7 +270,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * * Whether iconv support is available */ -#if 1 +#if 0 #define LIBXML_ICONV_ENABLED #endif @@ -499,5 +499,3 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); } #endif /* __cplusplus */ #endif - - diff --git a/contrib/llvm-project b/contrib/llvm-project index e7b8befca85..1834e42289c 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0 +Subproject commit 1834e42289c58402c804a87be4d489892b88f3ec diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 406bac73e90..d09060912d8 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -11,7 +11,9 @@ option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during quer option (ENABLE_DWARF_PARSER "Enable support for DWARF input format (uses LLVM library)" ${ENABLE_DWARF_PARSER_DEFAULT}) -if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER) +option (ENABLE_BLAKE3 "Enable BLAKE3 function" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER AND NOT ENABLE_BLAKE3) message(STATUS "Not using LLVM") return() endif() @@ -26,61 +28,75 @@ set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") # and llvm cannot be compiled with bundled libcxx and 20 standard. set (CMAKE_CXX_STANDARD 14) -# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. -set (REQUIRED_LLVM_LIBRARIES - LLVMExecutionEngine - LLVMRuntimeDyld - LLVMAsmPrinter - LLVMDebugInfoDWARF - LLVMGlobalISel - LLVMSelectionDAG - LLVMMCDisassembler - LLVMPasses - LLVMCodeGen - LLVMipo - LLVMBitWriter - LLVMInstrumentation - LLVMScalarOpts - LLVMAggressiveInstCombine - LLVMInstCombine - LLVMVectorize - LLVMTransformUtils - LLVMTarget - LLVMAnalysis - LLVMProfileData - LLVMObject - LLVMBitReader - LLVMCore - LLVMRemarks - LLVMBitstreamReader - LLVMMCParser - LLVMMC - LLVMBinaryFormat - LLVMDebugInfoCodeView - LLVMSupport - LLVMDemangle -) +if (ARCH_AMD64) + set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "") +elseif (ARCH_AARCH64) + set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "") +elseif (ARCH_PPC64LE) + set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "") +elseif (ARCH_S390X) + set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "") +elseif (ARCH_RISCV64) + set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "") +endif () + + +if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER) + # Only compiling blake3 + set (REQUIRED_LLVM_LIBRARIES LLVMSupport) +else() + # This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. + set (REQUIRED_LLVM_LIBRARIES + LLVMExecutionEngine + LLVMRuntimeDyld + LLVMAsmPrinter + LLVMDebugInfoDWARF + LLVMGlobalISel + LLVMSelectionDAG + LLVMMCDisassembler + LLVMPasses + LLVMCodeGen + LLVMipo + LLVMBitWriter + LLVMInstrumentation + LLVMScalarOpts + LLVMAggressiveInstCombine + LLVMInstCombine + LLVMVectorize + LLVMTransformUtils + LLVMTarget + LLVMAnalysis + LLVMProfileData + LLVMObject + LLVMBitReader + LLVMCore + LLVMRemarks + LLVMBitstreamReader + LLVMMCParser + LLVMMC + LLVMBinaryFormat + LLVMDebugInfoCodeView + LLVMSupport + LLVMDemangle + ) + + if (ARCH_AMD64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) + elseif (ARCH_AARCH64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) + elseif (ARCH_PPC64LE) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen) + elseif (ARCH_S390X) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen) + elseif (ARCH_RISCV64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen) + endif () +endif() + # Skip useless "install" instructions from CMake: set (LLVM_INSTALL_TOOLCHAIN_ONLY 1 CACHE INTERNAL "") -if (ARCH_AMD64) - set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) -elseif (ARCH_AARCH64) - set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) -elseif (ARCH_PPC64LE) - set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen) -elseif (ARCH_S390X) - set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen) -elseif (ARCH_RISCV64) - set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "") - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen) -endif () - message (STATUS "LLVM TARGETS TO BUILD ${LLVM_TARGETS_TO_BUILD}") set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind diff --git a/contrib/mariadb-connector-c-cmake/CMakeLists.txt b/contrib/mariadb-connector-c-cmake/CMakeLists.txt index 18d1510a57b..4257828890f 100644 --- a/contrib/mariadb-connector-c-cmake/CMakeLists.txt +++ b/contrib/mariadb-connector-c-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -if(OS_LINUX AND TARGET OpenSSL::SSL) +if((OS_LINUX OR OS_DARWIN) AND TARGET OpenSSL::SSL) option(ENABLE_MYSQL "Enable MySQL" ${ENABLE_LIBRARIES}) else () option(ENABLE_MYSQL "Enable MySQL" FALSE) @@ -73,7 +73,7 @@ set(HAVE_SYS_TYPES_H 1) set(HAVE_SYS_UN_H 1) set(HAVE_UNISTD_H 1) set(HAVE_UTIME_H 1) -set(HAVE_UCONTEXT_H 1) +set(HAVE_UCONTEXT_H 0) set(HAVE_ALLOCA 1) set(HAVE_DLERROR 0) set(HAVE_DLOPEN 0) @@ -116,9 +116,13 @@ CONFIGURE_FILE(${CC_SOURCE_DIR}/include/ma_config.h.in CONFIGURE_FILE(${CC_SOURCE_DIR}/include/mariadb_version.h.in ${CC_BINARY_DIR}/include-public/mariadb_version.h) -if(WITH_SSL) +if (WITH_SSL) set(SYSTEM_LIBS ${SYSTEM_LIBS} ${SSL_LIBRARIES}) -endif() +endif () + +if (OS_DARWIN) + set(SYSTEM_LIBS ${SYSTEM_LIBS} iconv) +endif () function(REGISTER_PLUGIN) @@ -227,15 +231,8 @@ ${CC_SOURCE_DIR}/libmariadb/secure/openssl_crypt.c ${CC_BINARY_DIR}/libmariadb/ma_client_plugin.c ) -if(ICONV_INCLUDE_DIR) - include_directories(BEFORE ${ICONV_INCLUDE_DIR}) -endif() add_definitions(-DLIBICONV_PLUG) -if(WITH_DYNCOL) - set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb_dyncol.c) -endif() - set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb_async.c ${CC_SOURCE_DIR}/libmariadb/ma_context.c) diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 7d7666dff87..c4220ba90ac 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -117,7 +117,7 @@ endif() add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX) -if (OS_LINUX OR OS_FREEBSD) +if ((OS_LINUX OR OS_FREEBSD) AND NOT USE_MUSL) add_definitions(-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX) endif() diff --git a/contrib/sentry-native b/contrib/sentry-native index ae10fb8c224..bc359f86cbf 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b +Subproject commit bc359f86cbf0f73f6fd4b6bfb4ede0c1f8c9400f diff --git a/contrib/sentry-native-cmake/CMakeLists.txt b/contrib/sentry-native-cmake/CMakeLists.txt index 377f955f856..6364e75db28 100644 --- a/contrib/sentry-native-cmake/CMakeLists.txt +++ b/contrib/sentry-native-cmake/CMakeLists.txt @@ -13,6 +13,7 @@ set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native") set (SRCS ${SRC_DIR}/vendor/mpack.c + ${SRC_DIR}/vendor/stb_sprintf.c ${SRC_DIR}/src/sentry_alloc.c ${SRC_DIR}/src/sentry_backend.c ${SRC_DIR}/src/sentry_core.c @@ -21,6 +22,7 @@ set (SRCS ${SRC_DIR}/src/sentry_json.c ${SRC_DIR}/src/sentry_logger.c ${SRC_DIR}/src/sentry_options.c + ${SRC_DIR}/src/sentry_os.c ${SRC_DIR}/src/sentry_random.c ${SRC_DIR}/src/sentry_ratelimiter.c ${SRC_DIR}/src/sentry_scope.c @@ -29,6 +31,7 @@ set (SRCS ${SRC_DIR}/src/sentry_string.c ${SRC_DIR}/src/sentry_sync.c ${SRC_DIR}/src/sentry_transport.c + ${SRC_DIR}/src/sentry_tracing.c ${SRC_DIR}/src/sentry_utils.c ${SRC_DIR}/src/sentry_uuid.c ${SRC_DIR}/src/sentry_value.c diff --git a/contrib/unixodbc-cmake/CMakeLists.txt b/contrib/unixodbc-cmake/CMakeLists.txt index 3317654cd67..6fbe8c14ebb 100644 --- a/contrib/unixodbc-cmake/CMakeLists.txt +++ b/contrib/unixodbc-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ option (ENABLE_ODBC "Enable ODBC library" ${ENABLE_LIBRARIES}) -if (NOT OS_LINUX) +if (NOT OS_LINUX OR USE_MUSL) if (ENABLE_ODBC) - message(STATUS "ODBC is only supported on Linux") + message(STATUS "ODBC is only supported on Linux with dynamic linking") endif() set (ENABLE_ODBC OFF CACHE INTERNAL "") endif () diff --git a/docker/images.json b/docker/images.json index 1535715648c..d2f098f53d7 100644 --- a/docker/images.json +++ b/docker/images.json @@ -125,6 +125,7 @@ "docker/test/server-jepsen", "docker/test/sqllogic", "docker/test/sqltest", + "docker/test/clickbench", "docker/test/stateless" ] }, @@ -145,6 +146,10 @@ "name": "clickhouse/server-jepsen-test", "dependent": [] }, + "docker/test/clickbench": { + "name": "clickhouse/clickbench", + "dependent": [] + }, "docker/test/install/deb": { "name": "clickhouse/install-deb-test", "dependent": [] diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index b174dfde675..145f5d13cc2 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,8 +34,9 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.10.5.20" +ARG VERSION="23.12.1.1368" ARG PACKAGES="clickhouse-keeper" +ARG DIRECT_DOWNLOAD_URLS="" # user/group precreated explicitly with fixed uid/gid on purpose. # It is especially important for rootless containers: in that case entrypoint @@ -47,15 +48,27 @@ ARG PACKAGES="clickhouse-keeper" ARG TARGETARCH RUN arch=${TARGETARCH:-amd64} \ - && for package in ${PACKAGES}; do \ - ( \ - cd /tmp \ - && echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ + && cd /tmp && rm -f /tmp/*tgz && rm -f /tmp/*tgz.sha512 |: \ + && if [ -n "${DIRECT_DOWNLOAD_URLS}" ]; then \ + echo "installing from provided urls with tgz packages: ${DIRECT_DOWNLOAD_URLS}" \ + && for url in $DIRECT_DOWNLOAD_URLS; do \ + echo "Get ${url}" \ + && wget -c -q "$url" \ + ; done \ + else \ + for package in ${PACKAGES}; do \ + cd /tmp \ + && echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz.sha512" \ - && sed 's:/output/:/tmp/:' < "${package}-${VERSION}-${arch}.tgz.sha512" | sha512sum -c \ - && tar xvzf "${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / \ - ) \ + ; done \ + fi \ + && cat *.tgz.sha512 | sha512sum -c \ + && for file in *.tgz; do \ + if [ -f "$file" ]; then \ + echo "Unpacking $file"; \ + tar xvzf "$file" --strip-components=1 -C /; \ + fi \ ; done \ && rm /tmp/*.tgz /install -r \ && addgroup -S -g 101 clickhouse \ diff --git a/docker/packager/README.md b/docker/packager/README.md index 3a91f9a63f0..e0b7f38ea58 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -3,10 +3,10 @@ compilers and build settings. Correctly configured Docker daemon is single depen Usage: -Build deb package with `clang-14` in `debug` mode: +Build deb package with `clang-17` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-17 --debug-build $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb @@ -17,11 +17,11 @@ $ ls -l deb/test_output ``` -Build ClickHouse binary with `clang-14` and `address` sanitizer in `relwithdebuginfo` +Build ClickHouse binary with `clang-17` and `address` sanitizer in `relwithdebuginfo` mode: ``` $ mkdir $HOME/some_clickhouse -$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-14 --sanitizer=address +$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-17 --sanitizer=address $ ls -l $HOME/some_clickhouse -rwxr-xr-x 1 root root 787061952 clickhouse lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 20fb97c80bb..1a99ab0d0b6 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -49,6 +49,7 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ chmod 777 -R /rust && \ rustup toolchain install nightly-2023-07-04 && \ rustup default nightly-2023-07-04 && \ + rustup toolchain remove stable && \ rustup component add rust-src && \ rustup target add x86_64-unknown-linux-gnu && \ rustup target add aarch64-unknown-linux-gnu && \ diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index fd9bfcaabb2..b63643419fe 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -149,7 +149,7 @@ then mkdir -p "$PERF_OUTPUT" cp -r ../tests/performance "$PERF_OUTPUT" cp -r ../tests/config/top_level_domains "$PERF_OUTPUT" - cp -r ../docker/test/performance-comparison/config "$PERF_OUTPUT" ||: + cp -r ../tests/performance/scripts/config "$PERF_OUTPUT" ||: for SRC in /output/clickhouse*; do # Copy all clickhouse* files except packages and bridges [[ "$SRC" != *.* ]] && [[ "$SRC" != *-bridge ]] && \ @@ -160,7 +160,7 @@ then ln -sf clickhouse "$PERF_OUTPUT"/clickhouse-keeper fi - cp -r ../docker/test/performance-comparison "$PERF_OUTPUT"/scripts ||: + cp -r ../tests/performance/scripts "$PERF_OUTPUT"/scripts ||: prepare_combined_output "$PERF_OUTPUT" # We have to know the revision that corresponds to this binary build. diff --git a/docker/packager/packager b/docker/packager/packager index b5bcbada1da..ade36a55591 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -145,6 +145,7 @@ def parse_env_variables( RISCV_SUFFIX = "-riscv64" S390X_SUFFIX = "-s390x" AMD64_COMPAT_SUFFIX = "-amd64-compat" + AMD64_MUSL_SUFFIX = "-amd64-musl" result = [] result.append("OUTPUT_DIR=/output") @@ -163,6 +164,7 @@ def parse_env_variables( is_cross_s390x = compiler.endswith(S390X_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) + is_amd64_musl = compiler.endswith(AMD64_MUSL_SUFFIX) if is_cross_darwin: cc = compiler[: -len(DARWIN_SUFFIX)] @@ -232,6 +234,12 @@ def parse_env_variables( cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") cmake_flags.append("-DNO_SSE3_OR_HIGHER=1") + elif is_amd64_musl: + cc = compiler[: -len(AMD64_MUSL_SUFFIX)] + result.append("DEB_ARCH=amd64") + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake" + ) else: cc = compiler result.append("DEB_ARCH=amd64") @@ -396,6 +404,7 @@ def parse_args() -> argparse.Namespace: "clang-17-riscv64", "clang-17-s390x", "clang-17-amd64-compat", + "clang-17-amd64-musl", "clang-17-freebsd", ), default="clang-17", diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index d4498abda6a..26d65eb3ccc 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,8 +32,9 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.10.5.20" +ARG VERSION="23.12.1.1368" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" +ARG DIRECT_DOWNLOAD_URLS="" # user/group precreated explicitly with fixed uid/gid on purpose. # It is especially important for rootless containers: in that case entrypoint @@ -43,15 +44,26 @@ ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # The same uid / gid (101) is used both for alpine and ubuntu. RUN arch=${TARGETARCH:-amd64} \ - && for package in ${PACKAGES}; do \ - ( \ - cd /tmp \ - && echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ + && cd /tmp \ + && if [ -n "${DIRECT_DOWNLOAD_URLS}" ]; then \ + echo "installing from provided urls with tgz packages: ${DIRECT_DOWNLOAD_URLS}" \ + && for url in $DIRECT_DOWNLOAD_URLS; do \ + echo "Get ${url}" \ + && wget -c -q "$url" \ + ; done \ + else \ + for package in ${PACKAGES}; do \ + echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz.sha512" \ - && sed 's:/output/:/tmp/:' < "${package}-${VERSION}-${arch}.tgz.sha512" | sha512sum -c \ - && tar xvzf "${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / \ - ) \ + ; done \ + fi \ + && cat *.tgz.sha512 | sed 's:/output/:/tmp/:' | sha512sum -c \ + && for file in *.tgz; do \ + if [ -f "$file" ]; then \ + echo "Unpacking $file"; \ + tar xvzf "$file" --strip-components=1 -C /; \ + fi \ ; done \ && rm /tmp/*.tgz /install -r \ && addgroup -S -g 101 clickhouse \ diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 08e95cd535b..5b96b208b11 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -30,13 +30,14 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.10.5.20" +ARG VERSION="23.12.1.1368" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image # from debs created by CI build, for example: # docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://..." -t ... ARG deb_location_url="" +ARG DIRECT_DOWNLOAD_URLS="" # set non-empty single_binary_location_url to create docker image # from a single binary url (useful for non-standard builds - with sanitizers, for arm64). @@ -44,6 +45,18 @@ ARG single_binary_location_url="" ARG TARGETARCH +# install from direct URL +RUN if [ -n "${DIRECT_DOWNLOAD_URLS}" ]; then \ + echo "installing from custom predefined urls with deb packages: ${DIRECT_DOWNLOAD_URLS}" \ + && rm -rf /tmp/clickhouse_debs \ + && mkdir -p /tmp/clickhouse_debs \ + && for url in $DIRECT_DOWNLOAD_URLS; do \ + wget --progress=bar:force:noscroll "$url" -P /tmp/clickhouse_debs || exit 1 \ + ; done \ + && dpkg -i /tmp/clickhouse_debs/*.deb \ + && rm -rf /tmp/* ; \ + fi + # install from a web location with deb packages RUN arch="${TARGETARCH:-amd64}" \ && if [ -n "${deb_location_url}" ]; then \ @@ -83,7 +96,7 @@ RUN if ! clickhouse local -q "SELECT ''" > /dev/null 2>&1; then \ && GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring \ --keyring /usr/share/keyrings/clickhouse-keyring.gpg \ --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 \ - && rm -r "$GNUPGHOME" \ + && rm -rf "$GNUPGHOME" \ && chmod +r /usr/share/keyrings/clickhouse-keyring.gpg \ && echo "${REPOSITORY}" > /etc/apt/sources.list.d/clickhouse.list \ && echo "installing from repository: ${REPOSITORY}" \ diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index b55baa0e0fc..b48017fdacc 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -12,6 +12,7 @@ RUN apt-get update \ ripgrep \ zstd \ locales \ + sudo \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index 6e3721956c0..ea82e071112 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -21,7 +21,7 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "} # trace_log needs more columns for symbolization EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), " -EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> toLowCardinality(demangle(addressToSymbol(x))), trace) AS symbols, arrayMap(x -> toLowCardinality(addressToLine(x)), trace) AS lines" +EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> demangle(addressToSymbol(x)), trace)::Array(LowCardinality(String)) AS symbols, arrayMap(x -> addressToLine(x), trace)::Array(LowCardinality(String)) AS lines" function __set_connection_args diff --git a/docker/test/clickbench/Dockerfile b/docker/test/clickbench/Dockerfile new file mode 100644 index 00000000000..0b6b1736e03 --- /dev/null +++ b/docker/test/clickbench/Dockerfile @@ -0,0 +1,10 @@ +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG + +ENV TZ=Europe/Amsterdam +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY *.sh / +COPY *.sql / + +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/clickbench/create.sql b/docker/test/clickbench/create.sql new file mode 100644 index 00000000000..9f18a47474b --- /dev/null +++ b/docker/test/clickbench/create.sql @@ -0,0 +1,112 @@ +ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955' +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL, + PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) +) +ENGINE = MergeTree +SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size = '16G', + disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/')); diff --git a/docker/test/clickbench/queries.sql b/docker/test/clickbench/queries.sql new file mode 100644 index 00000000000..31f65fc898d --- /dev/null +++ b/docker/test/clickbench/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh new file mode 100755 index 00000000000..3d27a40bb74 --- /dev/null +++ b/docker/test/clickbench/run.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +SCRIPT_PID=$! +(sleep 1200 && kill -9 $SCRIPT_PID) & + +# shellcheck disable=SC1091 +source /setup_export_logs.sh + +# fail on errors, verbose and export all env variables +set -e -x -a + +dpkg -i package_folder/clickhouse-common-static_*.deb +dpkg -i package_folder/clickhouse-server_*.deb +dpkg -i package_folder/clickhouse-client_*.deb + +# A directory for cache +mkdir /dev/shm/clickhouse +chown clickhouse:clickhouse /dev/shm/clickhouse + +# Allow introspection functions, needed for sending the logs +echo " +profiles: + default: + allow_introspection_functions: 1 +" > /etc/clickhouse-server/users.d/allow_introspection_functions.yaml + +# Enable text_log +echo " +text_log: +" > /etc/clickhouse-server/config.d/text_log.yaml + +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml + +clickhouse start + +# Wait for the server to start, but not for too long. +for _ in {1..100} +do + clickhouse-client --query "SELECT 1" && break + sleep 1 +done + +setup_logs_replication + +# Load the data + +clickhouse-client --time < /create.sql + +# Run the queries + +set +x + +TRIES=3 +QUERY_NUM=1 +while read -r query; do + echo -n "[" + for i in $(seq 1 $TRIES); do + RES=$(clickhouse-client --query_id "q${QUERY_NUM}-${i}" --time --format Null --query "$query" --progress 0 2>&1 ||:) + echo -n "${RES}" + [[ "$i" != "$TRIES" ]] && echo -n ", " + + echo "${QUERY_NUM},${i},${RES}" >> /test_output/test_results.tsv + done + echo "]," + + QUERY_NUM=$((QUERY_NUM + 1)) +done < /queries.sql + +set -x + +clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'" + +clickhouse-client -q "system flush logs" ||: +stop_logs_replication +clickhouse stop + +mv /var/log/clickhouse-server/* /test_output/ + +echo -e "success\tClickBench finished" > /test_output/check_status.tsv diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index d3695ba2613..5af05034415 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -16,7 +16,7 @@ export LLVM_VERSION=${LLVM_VERSION:-17} # it being undefined. Also read it as array so that we can pass an empty list # of additional variable to cmake properly, and it doesn't generate an extra # empty parameter. -# Read it as CMAKE_FLAGS to not lose exported FASTTEST_CMAKE_FLAGS on subsequential launch +# Read it as CMAKE_FLAGS to not lose exported FASTTEST_CMAKE_FLAGS on subsequent launch read -ra CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" # Run only matching tests. @@ -197,7 +197,7 @@ function run_cmake ( cd "$FASTTEST_BUILD" - cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" "${CMAKE_LIBS_CONFIG[@]}" "${CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" + cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" -DCMAKE_TOOLCHAIN_FILE="${FASTTEST_SOURCE}/cmake/linux/toolchain-x86_64-musl.cmake" "${CMAKE_LIBS_CONFIG[@]}" "${CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" ) } diff --git a/docker/test/integration/helper_container/Dockerfile b/docker/test/integration/helper_container/Dockerfile index 60adaea1796..49a3d3cd84b 100644 --- a/docker/test/integration/helper_container/Dockerfile +++ b/docker/test/integration/helper_container/Dockerfile @@ -1,7 +1,7 @@ # docker build -t clickhouse/integration-helper . # Helper docker container to run iptables without sudo -FROM alpine +FROM alpine:3.18 RUN apk add --no-cache -U iproute2 \ && for bin in iptables iptables-restore iptables-save; \ do ln -sf xtables-nft-multi "/sbin/$bin"; \ diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 458ca2b1da8..c795fbf0672 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -74,7 +74,7 @@ RUN python3 -m pip install --no-cache-dir \ delta-spark==2.3.0 \ dict2xml \ dicttoxml \ - docker \ + docker==6.1.3 \ docker-compose==1.29.2 \ grpcio \ grpcio-tools \ diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml index 45e55e7a79c..4255a529f6d 100644 --- a/docker/test/integration/runner/compose/docker_compose_minio.yml +++ b/docker/test/integration/runner/compose/docker_compose_minio.yml @@ -34,7 +34,7 @@ services: # Empty container to run proxy resolver. resolver: - image: clickhouse/python-bottle + image: clickhouse/python-bottle:${DOCKER_PYTHON_BOTTLE_TAG:-latest} expose: - "8080" tty: true diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index d31663f9071..e4ced104445 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -39,18 +39,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -COPY * / +COPY run.sh / -# Bind everything to one NUMA node, if there's more than one. Theoretically the -# node #0 should be less stable because of system interruptions. We bind -# randomly to node 1 or 0 to gather some statistics on that. We have to bind -# both servers and the tmpfs on which the database is stored. How to do it -# is unclear, but by default tmpfs uses -# 'process allocation policy', not sure which process but hopefully the one that -# writes to it, so just bind the downloader script as well. -# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt -# Double-escaped backslashes are a tribute to the engineering wonder of docker -- -# it gives '/bin/sh: 1: [bash,: not found' otherwise. -CMD ["bash", "-c", "node=$((RANDOM % $(numactl --hardware | sed -n 's/^.*available:\\(.*\\)nodes.*$/\\1/p'))); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"] +CMD ["bash", "/run.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/performance-comparison diff --git a/docker/test/performance-comparison/run.sh b/docker/test/performance-comparison/run.sh new file mode 100644 index 00000000000..7afb5da59b1 --- /dev/null +++ b/docker/test/performance-comparison/run.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +entry="/usr/share/clickhouse-test/performance/scripts/entrypoint.sh" +[ ! -e "$entry" ] && echo "ERROR: test scripts are not found" && exit 1 + +# Bind everything to one NUMA node, if there's more than one. Theoretically the +# node #0 should be less stable because of system interruptions. We bind +# randomly to node 1 or 0 to gather some statistics on that. We have to bind +# both servers and the tmpfs on which the database is stored. How to do it +# is unclear, but by default tmpfs uses +# 'process allocation policy', not sure which process but hopefully the one that +# writes to it, so just bind the downloader script as well. +# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt +# Double-escaped backslashes are a tribute to the engineering wonder of docker -- +# it gives '/bin/sh: 1: [bash,: not found' otherwise. +node=$(( RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p') )); +echo Will bind to NUMA node $node; +numactl --cpunodebind=$node --membind=$node $entry diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 5cf71e4d3f8..48457a99de3 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -20,7 +20,8 @@ RUN apt-get update --yes \ RUN pip3 install \ numpy \ pyodbc \ - deepdiff + deepdiff \ + sqlglot ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git" @@ -35,7 +36,7 @@ RUN git clone --recursive ${odbc_repo} \ && odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample ENV TZ=Europe/Amsterdam -ENV MAX_RUN_TIME=900 +ENV MAX_RUN_TIME=9000 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index db828741b0d..ccba344035e 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -75,6 +75,20 @@ function run_tests() cat /test_output/statements-test/check_status.tsv >> /test_output/check_status.tsv cat /test_output/statements-test/test_results.tsv >> /test_output/test_results.tsv tar -zcvf statements-check.tar.gz statements-test 1>/dev/null + + mkdir -p /test_output/complete-test + /clickhouse-tests/sqllogic/runner.py \ + --log-file /test_output/runner-complete-test.log \ + --log-level info \ + complete-test \ + --input-dir /sqllogictest \ + --out-dir /test_output/complete-test \ + 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' + + cat /test_output/complete-test/check_status.tsv >> /test_output/check_status.tsv + cat /test_output/complete-test/test_results.tsv >> /test_output/test_results.tsv + tar -zcvf complete-check.tar.gz complete-test 1>/dev/null fi } diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index ad3c3477b37..c9ce5697182 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -24,6 +24,28 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml +cache_policy="" +if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then + cache_policy="SLRU" +else + cache_policy="LRU" +fi + +echo "Using cache policy: $cache_policy" + +if [ "$cache_policy" = "SLRU" ]; then + sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|LRU|SLRU|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp + mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +fi + +if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + # It is not needed, we will explicitly create tables on s3. + # We do not have statefull tests with s3 storage run in public repository, but this is needed for another repository. + rm /etc/clickhouse-server/config.d/s3_storage_policy_for_merge_tree_by_default.xml +fi + function start() { if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -107,8 +129,76 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] else clickhouse-client --query "CREATE DATABASE test" clickhouse-client --query "SHOW TABLES FROM test" - clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" - clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" + if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, + EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, + UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, + RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), + URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, + FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, + UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, + MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, + SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, + ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, + SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, + FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, + HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, + GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, + HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, + HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, + FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, + LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, + RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, + ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, + OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, + UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, + URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, + ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), + IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, + VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, + Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, + EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, + AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), + RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, + SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, + ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, + SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, + UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, + FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, + FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, + Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, + BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), + Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), + WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, + ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, + ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, + ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, + ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, + ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, + OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, + UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, + PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, + PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), + CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, + StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, + OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, + UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, + ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), + Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, + DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) + ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) + SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + + clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" + clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" + else + clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" + clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" + fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" fi @@ -128,6 +218,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--replicated-database') fi + if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--s3-storage') + fi + if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') fi @@ -135,7 +229,7 @@ function run_tests() set +e if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then - clickhouse-test --client="clickhouse-client --use_hedged_requests=0 --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \ + clickhouse-test --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \ --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \ -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \ "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index 96f2aa96dd5..77601fb5af6 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -30,7 +30,7 @@ def build_url(base_url, dataset): return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset]) -def dowload_with_progress(url, path): +def download_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) for i in range(RETRIES_COUNT): try: @@ -110,7 +110,7 @@ if __name__ == "__main__": temp_archive_path = _get_temp_file_name() try: download_url_for_dataset = build_url(args.url_prefix, dataset) - dowload_with_progress(download_url_for_dataset, temp_archive_path) + download_with_progress(download_url_for_dataset, temp_archive_path) unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path) except Exception as ex: logging.info("Some exception occured %s", str(ex)) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index a2e2a708aaf..4e9486d7286 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -58,6 +58,7 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/users.d/s3_cache_new.xml + rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml fi # For flaky check we also enable thread fuzzer @@ -216,11 +217,11 @@ export -f run_tests if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: fi -timeout "$MAX_RUN_TIME" bash -c run_tests ||: +timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||: echo "Files in current directory" ls -la ./ @@ -300,9 +301,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.zst ||: - # FIXME: remove once only github actions will be left - rm /var/log/clickhouse-server/clickhouse-server1.log - rm /var/log/clickhouse-server/clickhouse-server2.log mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 1204434d853..9b6ab535a90 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -35,4 +35,17 @@ function fn_exists() { declare -F "$1" > /dev/null; } +function timeout_with_logging() { + local exit_code=0 + + timeout "${@}" || exit_code="${?}" + + if [[ "${exit_code}" -eq "124" ]] + then + echo "The command 'timeout ${*}' has been killed by timeout" + fi + + return $exit_code +} + # vi: ft=bash diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index afc1a386a48..67056cc1bc1 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -65,9 +65,27 @@ chmod 777 -R /var/lib/clickhouse clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" + stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log +# Randomize cache policies. +cache_policy="" +if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then + cache_policy="SLRU" +else + cache_policy="LRU" +fi + +echo "Using cache policy: $cache_policy" + +if [ "$cache_policy" = "SLRU" ]; then + sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|LRU|SLRU|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp + mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +fi + start clickhouse-client --query "SHOW TABLES FROM datasets" @@ -191,6 +209,13 @@ sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ > /etc/clickhouse-server/config.d/logger_trace.xml.tmp mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml +if [ "$cache_policy" = "SLRU" ]; then + sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|LRU|SLRU|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp + mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +fi + # Randomize async_load_databases if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then sudo echo "true" \ diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 315efb9e6c4..cc6cb292b66 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -23,6 +23,7 @@ echo "Check submodules" | ts ./check-submodules |& tee /test_output/submodules_output.txt echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt + /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv echo "Check help for changelog generator works" | ts cd ../changelog || exit 1 diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 158ac19229e..f014fce49f6 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -77,6 +77,7 @@ remove_keeper_config "create_if_not_exists" "[01]" # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml +rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml @@ -115,6 +116,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml +rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml diff --git a/docs/changelogs/v23.11.1.2711-stable.md b/docs/changelogs/v23.11.1.2711-stable.md new file mode 100644 index 00000000000..e32dee41dc7 --- /dev/null +++ b/docs/changelogs/v23.11.1.2711-stable.md @@ -0,0 +1,525 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.11.1.2711-stable (05bc8ef1e02) FIXME as compared to v23.10.1.1976-stable (13adae0e42f) + +#### Backward Incompatible Change +* Formatters `%l`/`%k`/`%c` in function `parseDateTime()` are now able to parse hours/months without leading zeros, e.g. `select parseDateTime('2023-11-26 8:14', '%F %k:%i')` now works. Set `parsedatetime_parse_without_leading_zeros = 0` to restore the previous behavior which required two digits. Function `formatDateTime` is now also able to print hours/months without leading zeros. This is controlled by setting `formatdatetime_format_without_leading_zeros` but off by default to not break existing use cases. [#55872](https://github.com/ClickHouse/ClickHouse/pull/55872) ([Azat Khuzhin](https://github.com/azat)). +* You can no longer use the aggregate function `avgWeighted` with arguments of type `Decimal`. Workaround: convert arguments to `Float64`. This closes [#43928](https://github.com/ClickHouse/ClickHouse/issues/43928). This closes [#31768](https://github.com/ClickHouse/ClickHouse/issues/31768). This closes [#56435](https://github.com/ClickHouse/ClickHouse/issues/56435). If you have used this function inside materialized views or projections with `Decimal` arguments, contact support@clickhouse.com. Fixed error in aggregate function `sumMap` and made it slower around 1.5..2 times. It does not matter because the function is garbage anyway. This closes [#54955](https://github.com/ClickHouse/ClickHouse/issues/54955). This closes [#53134](https://github.com/ClickHouse/ClickHouse/issues/53134). This closes [#55148](https://github.com/ClickHouse/ClickHouse/issues/55148). Fix a bug in function `groupArraySample` - it used the same random seed in case more than one aggregate state is generated in a query. [#56350](https://github.com/ClickHouse/ClickHouse/pull/56350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The default ClickHouse server configuration file has enabled `access_management` (user manipulation by SQL queries) and `named_collection_control` (manipulation of named collection by SQL queries) for the `default` user by default. This closes [#56482](https://github.com/ClickHouse/ClickHouse/issues/56482). [#56619](https://github.com/ClickHouse/ClickHouse/pull/56619) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Multiple improvements for RESPECT/IGNORE NULLS. [#57189](https://github.com/ClickHouse/ClickHouse/pull/57189) ([Raúl Marín](https://github.com/Algunenano)). +* Remove optimization optimize_move_functions_out_of_any. [#57190](https://github.com/ClickHouse/ClickHouse/pull/57190) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature +* Added server setting `async_load_databases` for asynchronous loading of databases and tables. Speeds up the server start time. Applies to databases with Ordinary, Atomic and Replicated engines. Their tables load metadata asynchronously. Query to a table increases the priority of the load job and waits for it to be done. Added table `system.async_loader`. [#49351](https://github.com/ClickHouse/ClickHouse/pull/49351) ([Sergei Trifonov](https://github.com/serxa)). +* 1. Add function `extractPlainRanges` to `KeyCondition`. 2. Add some useful functions to `Range` 3. Add `PlainRanges` who represent a serious of ranges that ordered and no overlapping. 4. Add `NumbersRangedSource` who can accurately return user selected numbers. [#50909](https://github.com/ClickHouse/ClickHouse/pull/50909) ([JackyWoo](https://github.com/JackyWoo)). +* Add system table `blob_storage_log`. [#52918](https://github.com/ClickHouse/ClickHouse/pull/52918) ([vdimir](https://github.com/vdimir)). +* Use statistic to order prewhere conditions better. [#53240](https://github.com/ClickHouse/ClickHouse/pull/53240) ([Han Fei](https://github.com/hanfei1991)). +* Added a new aggregation function `groupArraySorted(n)(value)` which returns an array with the n first values from a field value sorted by itself. [#53562](https://github.com/ClickHouse/ClickHouse/pull/53562) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Added support for compression in keeper protocol. Can be enabled on clickhouse by using this flag `use_compression` inside `zookeeper`. resolves [#49507](https://github.com/ClickHouse/ClickHouse/issues/49507). [#54957](https://github.com/ClickHouse/ClickHouse/pull/54957) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add ClickHouse setting to disable tunneling for HTTPS requests over HTTP proxy. [#55033](https://github.com/ClickHouse/ClickHouse/pull/55033) ([Arthur Passos](https://github.com/arthurpassos)). +* Introduce the feature `storage_metadata_write_full_object_key`. If it is set as `true` then metadata files are written with new format VERSION_FULL_OBJECT_KEY. With that format CH stores full remote object key in the metadata file. [#55566](https://github.com/ClickHouse/ClickHouse/pull/55566) ([Sema Checherinda](https://github.com/CheSema)). +* Add new settings and syntax to protect named collections' fields from being overridden. This is meant to prevent a malicious user from obtaining unauthorized access to secrets. [#55782](https://github.com/ClickHouse/ClickHouse/pull/55782) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Add `hostname` column to all system log tables;. [#55894](https://github.com/ClickHouse/ClickHouse/pull/55894) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `CHECK ALL TABLES` query. [#56022](https://github.com/ClickHouse/ClickHouse/pull/56022) ([vdimir](https://github.com/vdimir)). +* Added function `fromDaysSinceYearZero()` which is similar to MySQL's `FROM_DAYS`. E.g. `SELECT fromDaysSinceYearZero(739136)` returns `2023-09-08`. [#56088](https://github.com/ClickHouse/ClickHouse/pull/56088) ([Joanna Hulboj](https://github.com/jh0x)). +* Implemented series period detect method using FFT in pocketFFT lib. [#56171](https://github.com/ClickHouse/ClickHouse/pull/56171) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Add an external Python tool to view backups and to extract information from them without using ClickHouse. [#56268](https://github.com/ClickHouse/ClickHouse/pull/56268) ([Vitaly Baranov](https://github.com/vitlibar)). +* ... [#56275](https://github.com/ClickHouse/ClickHouse/pull/56275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This pull request implements new setting called `preferred_projection_name`. If it is set to a non-empty string, the specified projection would be used if possible. [#56309](https://github.com/ClickHouse/ClickHouse/pull/56309) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* S3 adaptive timeout means that first attempt made with low send and receive timeouts. [#56314](https://github.com/ClickHouse/ClickHouse/pull/56314) ([Sema Checherinda](https://github.com/CheSema)). +* Add 4-letter command for yielding/resigning leadership (https://github.com/ClickHouse/ClickHouse/issues/56352). [#56354](https://github.com/ClickHouse/ClickHouse/pull/56354) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Added a new SQL function, "arrayRandomSample(arr, k)" which returns a sample of k elements from the input array. Similar functionality could previously be achieved only with less convenient syntax, e.g. "SELECT arrayReduce('groupArraySample(3)', range(10))". [#56416](https://github.com/ClickHouse/ClickHouse/pull/56416) ([Robert Schulze](https://github.com/rschu1ze)). +* Added support for `float16` type data to use in `.npy` files. Closes [#56344](https://github.com/ClickHouse/ClickHouse/issues/56344). [#56424](https://github.com/ClickHouse/ClickHouse/pull/56424) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Added system view `information_schema.statistics` for better compatibility with Tableau Online. [#56425](https://github.com/ClickHouse/ClickHouse/pull/56425) ([Serge Klochkov](https://github.com/slvrtrn)). +* Add function `getClientHTTPHeader` for fetching values header values set in the HTTP request. [#56488](https://github.com/ClickHouse/ClickHouse/pull/56488) ([凌涛](https://github.com/lingtaolf)). +* Add a new table function named `fuzzJSON` with rows containing perturbed versions of the source JSON string with random variations. [#56490](https://github.com/ClickHouse/ClickHouse/pull/56490) ([Julia Kartseva](https://github.com/jkartseva)). +* Add `system.symbols` table useful for introspection of the binary. [#56548](https://github.com/ClickHouse/ClickHouse/pull/56548) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add 4-letter command for yielding/resigning leadership. [#56620](https://github.com/ClickHouse/ClickHouse/pull/56620) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Configurable dashboards. Queries for charts are now loaded using a query, which by default uses a new `system.dashboards` table. [#56771](https://github.com/ClickHouse/ClickHouse/pull/56771) ([Sergei Trifonov](https://github.com/serxa)). +* Introduce `fileCluster` table function. [#56868](https://github.com/ClickHouse/ClickHouse/pull/56868) ([Andrey Zvonov](https://github.com/zvonand)). +* Add `_size` virtual column with file size in bytes to `s3/file/hdfs/url/azureBlobStorage` engines. [#57126](https://github.com/ClickHouse/ClickHouse/pull/57126) ([Kruglov Pavel](https://github.com/Avogar)). +* Expose the number of errors occurred on a server since last restart from the Prometheus endpoint. [#57209](https://github.com/ClickHouse/ClickHouse/pull/57209) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added a new SQL function `sqid` to generate Sqids (https://sqids.org/), example: `SELECT sqid(125, 126)`. [#57442](https://github.com/ClickHouse/ClickHouse/pull/57442) ([awakeljw](https://github.com/awakeljw)). + +#### Performance Improvement +* Support window functions parallel evaluation. Fixes [#34688](https://github.com/ClickHouse/ClickHouse/issues/34688). [#39631](https://github.com/ClickHouse/ClickHouse/pull/39631) ([Dmitry Novik](https://github.com/novikd)). +* Increase the default value of `max_concurrent_queries` from 100 to 1000. This makes sense when there is a large number of connecting clients, which are slowly sending or receiving data, so the server is not limited by CPU, or when the number of CPU cores is larger than 100. Also, enable the concurrency control by default, and set the desired number of query processing threads in total as twice the number of CPU cores. It improves performance in scenarios with a very large number of concurrent queries. [#46927](https://github.com/ClickHouse/ClickHouse/pull/46927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed filtering by `IN(...)` condition for `Merge` table engine. [#54905](https://github.com/ClickHouse/ClickHouse/pull/54905) ([Nikita Taranov](https://github.com/nickitat)). +* An improvement which takes place when cache is full and there are big reads. [#55158](https://github.com/ClickHouse/ClickHouse/pull/55158) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add ability to disable checksums for S3 to avoid excessive input file read (this new behavior could be enabled with `s3_disable_checksum=true`). [#55559](https://github.com/ClickHouse/ClickHouse/pull/55559) ([Azat Khuzhin](https://github.com/azat)). +* Now we read synchronously from remote tables when data is in page cache (like we do for local tables). It is faster, doesn't require synchronisation inside thread pool, doesn't hesitate to do `seek`-s on local fs and reduces cpu wait. [#55841](https://github.com/ClickHouse/ClickHouse/pull/55841) ([Nikita Taranov](https://github.com/nickitat)). +* ... This PR follows [#55929](https://github.com/ClickHouse/ClickHouse/issues/55929), it will bring about 30% speedup. - reduce the reserved memory - reduce the `resize` call. [#55957](https://github.com/ClickHouse/ClickHouse/pull/55957) ([lgbo](https://github.com/lgbo-ustc)). +* The performance experiments of **OnTime** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring the improvements of **7.4%, 5.9%, 4.7%, 3.0%, and 4.6%** to the QPS of the query Q2, Q3, Q4, Q5 and Q6 respectively while having no impact on others. [#56079](https://github.com/ClickHouse/ClickHouse/pull/56079) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Limit the number of threads busy inside the query profiler. If there are more - they will skip profiling. [#56105](https://github.com/ClickHouse/ClickHouse/pull/56105) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* WindowTransform decrease amount of virtual function calls. [#56120](https://github.com/ClickHouse/ClickHouse/pull/56120) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow recursive tuple field pruning in ORC to speed up scaning. [#56122](https://github.com/ClickHouse/ClickHouse/pull/56122) ([李扬](https://github.com/taiyang-li)). +* This pull request provides countRows support for Npy data format. Now with setting `optimize_count_from_files=1` queries like `select count() from file(data.npy)` will work much more fast because of caching the results. [#56304](https://github.com/ClickHouse/ClickHouse/pull/56304) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Queries with aggregation and a large number of streams will use less amount of memory during the plan's construction. [#57074](https://github.com/ClickHouse/ClickHouse/pull/57074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of executing queries for use cases with many users. [#57106](https://github.com/ClickHouse/ClickHouse/pull/57106) ([Andrej Hoos](https://github.com/adikus)). +* Trivial improvement on array join, reuse some intermediate results. [#57183](https://github.com/ClickHouse/ClickHouse/pull/57183) ([李扬](https://github.com/taiyang-li)). +* There are cases when stack unwinding was slow. [#57221](https://github.com/ClickHouse/ClickHouse/pull/57221) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use default read pool for reading from external storage when `max_streams = 1`. It is beneficial when read prefetches are enabled. [#57334](https://github.com/ClickHouse/ClickHouse/pull/57334) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Engine `Merge` filters the records according to the row policies of the underlying tables. [#50209](https://github.com/ClickHouse/ClickHouse/pull/50209) ([Ilya Golshtein](https://github.com/ilejn)). +* Add a setting `max_execution_time_leaf` to limit the execution time on shard for distributed query, and `timeout_overflow_mode_leaf` to control the behaviour if timeout happens. [#51823](https://github.com/ClickHouse/ClickHouse/pull/51823) ([Duc Canh Le](https://github.com/canhld94)). +* Fix possible postgresql logical replication conversion_error when using MaterializedPostgreSQL. [#53721](https://github.com/ClickHouse/ClickHouse/pull/53721) ([takakawa](https://github.com/takakawa)). +* Set `background_fetches_pool_size` to 16, background_schedule_pool_size to 512 that is better for production usage with frequent small insertions. [#54327](https://github.com/ClickHouse/ClickHouse/pull/54327) ([Denny Crane](https://github.com/den-crane)). +* While read data from a csv format file, and at end of line is'\r' , which not followed by '\n', then we will enconter the exception as below ``` Cannot parse CSV format: found \r (CR) not followed by \n (LF). Line must end by \n (LF) or \r\n (CR LF) or \n\r.: ``` In clickhouse, the csv end of line must be \n or \r\n or \n\r, so the \r must be followed by \n , but in some suitation, the csv input data is abnormal, like above, \r is at end of line. [#54340](https://github.com/ClickHouse/ClickHouse/pull/54340) ([KevinyhZou](https://github.com/KevinyhZou)). +* Update arrow library to release-13.0.0 that supports new encodings. Closes [#44505](https://github.com/ClickHouse/ClickHouse/issues/44505). [#54800](https://github.com/ClickHouse/ClickHouse/pull/54800) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve performance of ON CLUSTER queries by removing heavy system calls to get all network interfaces when looking for local ip address in the DDL entry hosts list. [#54909](https://github.com/ClickHouse/ClickHouse/pull/54909) ([Duc Canh Le](https://github.com/canhld94)). +* Keeper improvement: improve memory-usage during startup by delaying log preprocessing. [#55660](https://github.com/ClickHouse/ClickHouse/pull/55660) ([Antonio Andelic](https://github.com/antonio2368)). +* Fixed accounting of memory allocated before attaching thread to a query or a user. [#56089](https://github.com/ClickHouse/ClickHouse/pull/56089) ([Nikita Taranov](https://github.com/nickitat)). +* ClickHouse keeper reports its running availability zone at `/keeper/availability-zone` path, when running on AWS environment. [#56104](https://github.com/ClickHouse/ClickHouse/pull/56104) ([Jianfei Hu](https://github.com/incfly)). +* Add support for LARGE_LIST with Arrow. [#56118](https://github.com/ClickHouse/ClickHouse/pull/56118) ([edef](https://github.com/edef1c)). +* Improved performance of glob matching for `file` and `hdfs` storages. [#56141](https://github.com/ClickHouse/ClickHouse/pull/56141) ([Andrey Zvonov](https://github.com/zvonand)). +* Allow manual compaction of `EmbeddedRocksDB` via `OPTIMIZE` query. [#56225](https://github.com/ClickHouse/ClickHouse/pull/56225) ([Azat Khuzhin](https://github.com/azat)). +* Posting lists in inverted indexes are now compressed which reduces their size by 10-30%. [#56226](https://github.com/ClickHouse/ClickHouse/pull/56226) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Add ability to specify BlockBasedTableOptions for EmbeddedRocksDB. [#56264](https://github.com/ClickHouse/ClickHouse/pull/56264) ([Azat Khuzhin](https://github.com/azat)). +* `SHOW COLUMNS` now displays MySQL's equivalent data type name when the connection was made through the MySQL protocol. Previously, this was the case when setting `use_mysql_types_in_show_columns = 1`. The setting is retained but made obsolete. [#56277](https://github.com/ClickHouse/ClickHouse/pull/56277) ([Robert Schulze](https://github.com/rschu1ze)). +* Fixed possible `The local set of parts of table doesn't look like the set of parts in ZooKeeper` error if server was restarted just after `TRUNCATE` or `DROP PARTITION`. [#56282](https://github.com/ClickHouse/ClickHouse/pull/56282) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Parallelise `BackupEntriesCollector`. [#56312](https://github.com/ClickHouse/ClickHouse/pull/56312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed handling of non-const query strings in functions `formatQuery()`/ `formatQuerySingleLine()`. Also added `OrNull` variants of both functions that return a NULL when a query cannot be parsed instead of throwing an exception. [#56327](https://github.com/ClickHouse/ClickHouse/pull/56327) ([Robert Schulze](https://github.com/rschu1ze)). +* Support create and materialized index in the same alter query, also support modity TTL and materialize TTL in the same query. Closes [#55651](https://github.com/ClickHouse/ClickHouse/issues/55651). [#56331](https://github.com/ClickHouse/ClickHouse/pull/56331) ([flynn](https://github.com/ucasfl)). +* Enable adding new disk to storage configuration without restart. [#56367](https://github.com/ClickHouse/ClickHouse/pull/56367) ([Duc Canh Le](https://github.com/canhld94)). +* Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Queries to `system.replicas` initiate requests to ZooKeeper when certain columns are queried. When there are thousands of tables these requests might produce a considerable load on ZooKeeper. If there are multiple simultaneous queries to `system.replicas` they do same requests multiple times. The change is to "deduplicate" requests from concurrent queries. [#56420](https://github.com/ClickHouse/ClickHouse/pull/56420) ([Alexander Gololobov](https://github.com/davenger)). +* Add transition from reading key to reading quoted key when double quotes are found. [#56423](https://github.com/ClickHouse/ClickHouse/pull/56423) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix transfer query to MySQL compatible query. [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)). +* Add support for backing up and restoring tables using KeeperMap engine. [#56460](https://github.com/ClickHouse/ClickHouse/pull/56460) ([Antonio Andelic](https://github.com/antonio2368)). +* 404 response for CompleteMultipartUpload has to be rechecked. Operation could be done on server even if client got timeout or other network errors. The next retry of CompleteMultipartUpload receives 404 response. If the object key exists that operation is considered as successful. [#56475](https://github.com/ClickHouse/ClickHouse/pull/56475) ([Sema Checherinda](https://github.com/CheSema)). +* Enable the HTTP OPTIONS method by default - it simplifies requesting ClickHouse from a web browser. [#56483](https://github.com/ClickHouse/ClickHouse/pull/56483) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The value for `dns_max_consecutive_failures` was changed by mistake in [#46550](https://github.com/ClickHouse/ClickHouse/issues/46550) - this is reverted and adjusted to a better value. Also, increased the HTTP keep-alive timeout to a reasonable value from production. [#56485](https://github.com/ClickHouse/ClickHouse/pull/56485) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Load base backups lazily (a base backup won't be loaded until it's needed). Also add some log message and profile events for backups. [#56516](https://github.com/ClickHouse/ClickHouse/pull/56516) ([Vitaly Baranov](https://github.com/vitlibar)). +* Setting `query_cache_store_results_of_queries_with_nondeterministic_functions` (with values `false` or `true`) was marked obsolete. It was replaced by setting `query_cache_nondeterministic_function_handling`, a three-valued enum that controls how the query cache handles queries with non-deterministic functions: a) throw an exception (default behavior), b) save the non-deterministic query result regardless, or c) ignore, i.e. don't throw an exception and don't cache the result. [#56519](https://github.com/ClickHouse/ClickHouse/pull/56519) ([Robert Schulze](https://github.com/rschu1ze)). +* Rewrite equality with `is null` check in JOIN ON section. *Analyzer only*. [#56538](https://github.com/ClickHouse/ClickHouse/pull/56538) ([vdimir](https://github.com/vdimir)). +* Function`concat` now supports arbitrary argument types (instead of only String and FixedString arguments). This makes it behave more similar to MySQL `concat` implementation. For example, `SELECT concat('ab', 42)` now returns `ab42`. [#56540](https://github.com/ClickHouse/ClickHouse/pull/56540) ([Serge Klochkov](https://github.com/slvrtrn)). +* Allow getting cache configuration from 'named_collection' section in config or from sql created named collection. [#56541](https://github.com/ClickHouse/ClickHouse/pull/56541) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update `query_masking_rules` when reloading the config ([#56449](https://github.com/ClickHouse/ClickHouse/issues/56449)). [#56573](https://github.com/ClickHouse/ClickHouse/pull/56573) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Make removeoutdatedtables() less aggressive with unsuccessful postgres connection. [#56609](https://github.com/ClickHouse/ClickHouse/pull/56609) ([jsc0218](https://github.com/jsc0218)). +* Currenting setting takes too much time to connnect to PG when URL is not right, so the relevant query stucks there and get cancelled. [#56648](https://github.com/ClickHouse/ClickHouse/pull/56648) ([jsc0218](https://github.com/jsc0218)). +* ClickHouse keeper reports its running availability zone at `/keeper/availability-zone` path. This can be configured via `us-west-1a`. [#56715](https://github.com/ClickHouse/ClickHouse/pull/56715) ([Jianfei Hu](https://github.com/incfly)). +* Do not allow tables on different replicas have different aggregate functions in SimpleAggregateFunction columns. [#56724](https://github.com/ClickHouse/ClickHouse/pull/56724) ([Duc Canh Le](https://github.com/canhld94)). +* Add support for the [well-known Protobuf types](https://protobuf.dev/reference/protobuf/google.protobuf/) in the Protobuf format. [#56741](https://github.com/ClickHouse/ClickHouse/pull/56741) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Keeper improvement: disable compressed logs by default in Keeper. [#56763](https://github.com/ClickHouse/ClickHouse/pull/56763) ([Antonio Andelic](https://github.com/antonio2368)). +* Add config setting `wait_dictionaries_load_at_startup`:. [#56782](https://github.com/ClickHouse/ClickHouse/pull/56782) ([Vitaly Baranov](https://github.com/vitlibar)). +* There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)). +* Implement user-level setting `alter_move_to_space_execute_async` which allow to execute queries `ALTER TABLE ... MOVE PARTITION|PART TO DISK|VOLUME` asynchronously. The size of pool for background executions is controlled by `background_move_pool_size`. Default behavior is synchronous execution. Fixes [#47643](https://github.com/ClickHouse/ClickHouse/issues/47643). [#56809](https://github.com/ClickHouse/ClickHouse/pull/56809) ([alesapin](https://github.com/alesapin)). +* Able to filter by engine when scanning system.tables, avoid unnecessary (potentially time-consuming) connection. [#56813](https://github.com/ClickHouse/ClickHouse/pull/56813) ([jsc0218](https://github.com/jsc0218)). +* Show `total_bytes` and `total_rows` in system tables for RocksDB storage. [#56816](https://github.com/ClickHouse/ClickHouse/pull/56816) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow basic commands in ALTER for TEMPORARY tables. [#56892](https://github.com/ClickHouse/ClickHouse/pull/56892) ([Sergey](https://github.com/icuken)). +* Lz4 compression. Buffer compressed block in a rare case when out buffer capacity is not enough for writing compressed block directly to out's buffer. [#56938](https://github.com/ClickHouse/ClickHouse/pull/56938) ([Sema Checherinda](https://github.com/CheSema)). +* Add metrics for the number of queued jobs, which is useful for the IO thread pool. [#56958](https://github.com/ClickHouse/ClickHouse/pull/56958) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a setting for PostgreSQL table engine setting in the config file Added a check for the setting Added documentation around the additional setting. [#56959](https://github.com/ClickHouse/ClickHouse/pull/56959) ([Peignon Melvyn](https://github.com/melvynator)). +* Run interpreter with `only_analyze` flag in getsampleblock method. [#56972](https://github.com/ClickHouse/ClickHouse/pull/56972) ([Mikhail Artemenko](https://github.com/Michicosun)). +* Add a new `MergeTree` setting `add_implicit_sign_column_constraint_for_collapsing_engine` (disabled by default). When enabled, it adds an implicit CHECK constraint for `CollapsingMergeTree` tables that restricts the value of the `Sign` column to be only -1 or 1. [#56701](https://github.com/ClickHouse/ClickHouse/issues/56701). [#56986](https://github.com/ClickHouse/ClickHouse/pull/56986) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). +* Function `concat()` can now be called with a single argument, e.g., `SELECT concat('abc')`. This makes its behavior more consistent with MySQL's concat implementation. [#57000](https://github.com/ClickHouse/ClickHouse/pull/57000) ([Serge Klochkov](https://github.com/slvrtrn)). +* Signs all `x-amz-*` headers as required by AWS S3 docs. [#57001](https://github.com/ClickHouse/ClickHouse/pull/57001) ([Arthur Passos](https://github.com/arthurpassos)). +* Function `fromDaysSinceYearZero` (alias: `FROM_DAYS`) can now be used with unsigned and signed integer types (previously, it had to be an unsigned integer). This improve compatibility with 3rd party tools such as Tableau Online. [#57002](https://github.com/ClickHouse/ClickHouse/pull/57002) ([Serge Klochkov](https://github.com/slvrtrn)). +* Add system.s3queue_log to default config. [#57036](https://github.com/ClickHouse/ClickHouse/pull/57036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change the default for `wait_dictionaries_load_at_startup` to true, and use this setting only if `dictionaries_lazy_load` is false. [#57133](https://github.com/ClickHouse/ClickHouse/pull/57133) ([Vitaly Baranov](https://github.com/vitlibar)). +* Check dictionary source type on creation even if `dictionaries_lazy_load` is enabled. [#57134](https://github.com/ClickHouse/ClickHouse/pull/57134) ([Vitaly Baranov](https://github.com/vitlibar)). +* Plan-level optimizations can now be enabled/disabled individually. Previously, it was only possible to disable them all. The setting which previously did that (`query_plan_enable_optimizations`) is retained and can still be used to disable all optimizations. [#57152](https://github.com/ClickHouse/ClickHouse/pull/57152) ([Robert Schulze](https://github.com/rschu1ze)). +* The server's exit code will correspond to the exception code. For example, if the server cannot start due to memory limit, it will exit with the code 241 = MEMORY_LIMIT_EXCEEDED. In previous versions, the exit code for exceptions was always 70 = Poco::Util::ExitCode::EXIT_SOFTWARE. [#57153](https://github.com/ClickHouse/ClickHouse/pull/57153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not demangle and symbolize stack frames from __functional c++ header. [#57201](https://github.com/ClickHouse/ClickHouse/pull/57201) ([Mike Kot](https://github.com/myrrc)). +* It is now possible to refer to ALIAS column in index (non-primary-key) definitions (issue [#55650](https://github.com/ClickHouse/ClickHouse/issues/55650)). Example: `CREATE TABLE tab(col UInt32, col_alias ALIAS col + 1, INDEX idx (col_alias) TYPE minmax) ENGINE = MergeTree ORDER BY col;`. [#57220](https://github.com/ClickHouse/ClickHouse/pull/57220) ([flynn](https://github.com/ucasfl)). +* HTTP server page `/dashboard` now supports charts with multiple lines. [#57236](https://github.com/ClickHouse/ClickHouse/pull/57236) ([Sergei Trifonov](https://github.com/serxa)). +* This pr gives possibility to use suffixes (K, M, G, T, E) along with the amount of memory to be used. Closes [#56879](https://github.com/ClickHouse/ClickHouse/issues/56879). [#57273](https://github.com/ClickHouse/ClickHouse/pull/57273) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.2.0 to v1.3.1 . Also fixed a bug in case of BOF (Block On Fault) = 0, changed to handle page faults by falling back to SW path. [#57291](https://github.com/ClickHouse/ClickHouse/pull/57291) ([jasperzhu](https://github.com/jinjunzh)). +* Make alter materialized view non experimental and deprecate `allow_experimental_alter_materialized_view_structure` setting. Fixes [#15206](https://github.com/ClickHouse/ClickHouse/issues/15206). [#57311](https://github.com/ClickHouse/ClickHouse/pull/57311) ([alesapin](https://github.com/alesapin)). +* Increase default `replicated_deduplication_window` of MergeTree settings from 100 to 1k. [#57335](https://github.com/ClickHouse/ClickHouse/pull/57335) ([sichenzhao](https://github.com/sichenzhao)). +* Stop using `INCONSISTENT_METADATA_FOR_BACKUP` that much. If possible prefer to continue scanning instead of stopping and starting the scanning for backup from the beginning. [#57385](https://github.com/ClickHouse/ClickHouse/pull/57385) ([Vitaly Baranov](https://github.com/vitlibar)). +* Introduce the limit for the maximum number of table projections (default 25). [#57491](https://github.com/ClickHouse/ClickHouse/pull/57491) ([Julia Kartseva](https://github.com/jkartseva)). +* Enable `async_block_ids_cache` by default for `async_inserts` deduplication. [#57513](https://github.com/ClickHouse/ClickHouse/pull/57513) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement +* Enable temporary_data_in_cache in s3 tests in CI. [#48425](https://github.com/ClickHouse/ClickHouse/pull/48425) ([vdimir](https://github.com/vdimir)). +* Run sqllogic test. [#56078](https://github.com/ClickHouse/ClickHouse/pull/56078) ([Han Fei](https://github.com/hanfei1991)). +* Add a new build option `SANITIZE_COVERAGE`. If it is enabled, the code is instrumented to track the coverage. The collected information is available inside ClickHouse with: (1) a new function `coverage` that returns an array of unique addresses in the code found after the previous coverage reset; (2) `SYSTEM RESET COVERAGE` query that resets the accumulated data. This allows us to compare the coverage of different tests, including differential code coverage. Continuation of [#20539](https://github.com/ClickHouse/ClickHouse/issues/20539). [#56102](https://github.com/ClickHouse/ClickHouse/pull/56102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In [#54043](https://github.com/ClickHouse/ClickHouse/issues/54043) the setup plan started to appear in the logs. It should be only in the `runner_get_all_tests.log` only. As well, send the failed infrastructure event to CI db. [#56214](https://github.com/ClickHouse/ClickHouse/pull/56214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Some of the stack frames might not be resolved when collecting stacks. In such cases the raw address might be helpful. [#56267](https://github.com/ClickHouse/ClickHouse/pull/56267) ([Alexander Gololobov](https://github.com/davenger)). +* Add an option to disable libssh. [#56333](https://github.com/ClickHouse/ClickHouse/pull/56333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add automatic check that there are no large translation units. [#56559](https://github.com/ClickHouse/ClickHouse/pull/56559) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lower the size of the single-binary distribution. This closes [#55181](https://github.com/ClickHouse/ClickHouse/issues/55181). [#56617](https://github.com/ClickHouse/ClickHouse/pull/56617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `clickhouse-local` and `clickhouse-client` available under short names (`ch`, `chl`, `chc`) for usability. [#56634](https://github.com/ClickHouse/ClickHouse/pull/56634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Information about the sizes of every translation unit and binary file after each build will be sent to the CI database in ClickHouse Cloud. This closes [#56107](https://github.com/ClickHouse/ClickHouse/issues/56107). [#56636](https://github.com/ClickHouse/ClickHouse/pull/56636) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Certain files of "Apache Arrow" library (which we use only for non-essential things like parsing the arrow format) were rebuilt all the time regardless of the build cache. This is fixed. [#56657](https://github.com/ClickHouse/ClickHouse/pull/56657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid recompiling translation units depending on the autogenerated source file about version. [#56660](https://github.com/ClickHouse/ClickHouse/pull/56660) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Tracing data of the linker invocations will be sent to the CI database in ClickHouse Cloud. [#56725](https://github.com/ClickHouse/ClickHouse/pull/56725) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use DWARF 5 debug symbols for the clickhouse binary (was DWARF 4 previously). [#56770](https://github.com/ClickHouse/ClickHouse/pull/56770) ([Michael Kolupaev](https://github.com/al13n321)). +* Optimized build size further by removing unused code from external libraries. [#56786](https://github.com/ClickHouse/ClickHouse/pull/56786) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Set memory usage for client (`1G`) to address problems like this: https://s3.amazonaws.com/clickhouse-test-reports/0/f1bf3f1fc39f520871ec878d815e515e12fd3e7b/fuzzer_astfuzzertsan/report.html. [#56873](https://github.com/ClickHouse/ClickHouse/pull/56873) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed the memory leak in integration test of postgres dictionary. The case of network partition is not correctly handled at the time of the repo pulled years ago. [#57231](https://github.com/ClickHouse/ClickHouse/pull/57231) ([jsc0218](https://github.com/jsc0218)). +* Fix a test filename typo. [#57272](https://github.com/ClickHouse/ClickHouse/pull/57272) ([jsc0218](https://github.com/jsc0218)). +* Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix analyzer - insertion from select with subquery referencing insertion table should process only insertion block. [#50857](https://github.com/ClickHouse/ClickHouse/pull/50857) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Setting JoinAlgorithm respect specified order [#51745](https://github.com/ClickHouse/ClickHouse/pull/51745) ([vdimir](https://github.com/vdimir)). +* Keeper `reconfig`: add timeout before yielding/taking leadership [#53481](https://github.com/ClickHouse/ClickHouse/pull/53481) ([Mike Kot](https://github.com/myrrc)). +* Fix incorrect header in grace hash join and filter pushdown [#53922](https://github.com/ClickHouse/ClickHouse/pull/53922) ([vdimir](https://github.com/vdimir)). +* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* RFC: Fix "Cannot find column X in source stream" for Distributed queries with LIMIT BY [#55836](https://github.com/ClickHouse/ClickHouse/pull/55836) ([Azat Khuzhin](https://github.com/azat)). +* Fix 'Cannot read from file:' while running client in a background [#55976](https://github.com/ClickHouse/ClickHouse/pull/55976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix clickhouse-local exit on bad send_logs_level setting [#55994](https://github.com/ClickHouse/ClickHouse/pull/55994) ([Kruglov Pavel](https://github.com/Avogar)). +* Bug fix explain ast with parameterized view [#56004](https://github.com/ClickHouse/ClickHouse/pull/56004) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ClickHouse-sourced dictionaries with an explicit query [#56236](https://github.com/ClickHouse/ClickHouse/pull/56236) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inconsistency of "cast('0' as DateTime64(3))" and "cast('0' as Nullable(DateTime64(3)))" [#56286](https://github.com/ClickHouse/ClickHouse/pull/56286) ([李扬](https://github.com/taiyang-li)). +* Fix rare race condition related to Memory allocation failure [#56303](https://github.com/ClickHouse/ClickHouse/pull/56303) ([alesapin](https://github.com/alesapin)). +* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix crash in filterPushDown [#56380](https://github.com/ClickHouse/ClickHouse/pull/56380) ([vdimir](https://github.com/vdimir)). +* Fix restore from backup with mat view and dropped source table [#56383](https://github.com/ClickHouse/ClickHouse/pull/56383) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix nullable primary key in final (2) [#56452](https://github.com/ClickHouse/ClickHouse/pull/56452) ([Amos Bird](https://github.com/amosbird)). +* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix startup failure due to TTL dependency [#56489](https://github.com/ClickHouse/ClickHouse/pull/56489) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ALTER COMMENT queries ON CLUSTER [#56491](https://github.com/ClickHouse/ClickHouse/pull/56491) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix empty NAMED COLLECTIONs [#56494](https://github.com/ClickHouse/ClickHouse/pull/56494) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix two cases of projection analysis. [#56502](https://github.com/ClickHouse/ClickHouse/pull/56502) ([Amos Bird](https://github.com/amosbird)). +* Fix handling of aliases in query cache [#56545](https://github.com/ClickHouse/ClickHouse/pull/56545) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix conversion from `Nullable(Enum)` to `Nullable(String)` [#56644](https://github.com/ClickHouse/ClickHouse/pull/56644) ([Nikolay Degterinsky](https://github.com/evillique)). +* More reliable log handling in Keeper [#56670](https://github.com/ClickHouse/ClickHouse/pull/56670) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix configuration merge for nodes with substitution attributes [#56694](https://github.com/ClickHouse/ClickHouse/pull/56694) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix duplicate usage of table function input(). [#56695](https://github.com/ClickHouse/ClickHouse/pull/56695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix 'mutex lock failed: Invalid argument' in clickhouse-local during insert into function [#56710](https://github.com/ClickHouse/ClickHouse/pull/56710) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix Date text parsing in optimistic path [#56765](https://github.com/ClickHouse/ClickHouse/pull/56765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* DatabaseReplicated: fix DDL query timeout after recovering a replica [#56796](https://github.com/ClickHouse/ClickHouse/pull/56796) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect nullable columns reporting in MySQL binary protocol [#56799](https://github.com/ClickHouse/ClickHouse/pull/56799) ([Serge Klochkov](https://github.com/slvrtrn)). +* Support Iceberg metadata files for metastore tables [#56810](https://github.com/ClickHouse/ClickHouse/pull/56810) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix TSAN report under transform [#56817](https://github.com/ClickHouse/ClickHouse/pull/56817) ([Raúl Marín](https://github.com/Algunenano)). +* Fix SET query and SETTINGS formatting [#56825](https://github.com/ClickHouse/ClickHouse/pull/56825) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix failure to start due to table dependency in joinGet [#56828](https://github.com/ClickHouse/ClickHouse/pull/56828) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix flattening existing Nested columns during ADD COLUMN [#56830](https://github.com/ClickHouse/ClickHouse/pull/56830) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix allow cr end of line for csv [#56901](https://github.com/ClickHouse/ClickHouse/pull/56901) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix `tryBase64Decode()` with invalid input [#56913](https://github.com/ClickHouse/ClickHouse/pull/56913) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix generating deep nested columns in CapnProto/Protobuf schemas [#56941](https://github.com/ClickHouse/ClickHouse/pull/56941) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)). +* Fix sqlite file path validation [#56984](https://github.com/ClickHouse/ClickHouse/pull/56984) ([San](https://github.com/santrancisco)). +* S3Queue: fix metadata reference increment [#56990](https://github.com/ClickHouse/ClickHouse/pull/56990) ([Kseniia Sumarokova](https://github.com/kssenii)). +* S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). +* Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). +* Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). +* Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix JSON_QUERY function with multiple numeric paths [#57096](https://github.com/ClickHouse/ClickHouse/pull/57096) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)). +* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)). +* Ignore comments when comparing column descriptions [#57259](https://github.com/ClickHouse/ClickHouse/pull/57259) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)). +* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). +* Keeper fix for changelog and snapshots [#57299](https://github.com/ClickHouse/ClickHouse/pull/57299) ([Antonio Andelic](https://github.com/antonio2368)). +* Ignore finished ON CLUSTER tasks if hostname changed [#57339](https://github.com/ClickHouse/ClickHouse/pull/57339) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix function jsonMergePatch for partially const columns [#57379](https://github.com/ClickHouse/ClickHouse/pull/57379) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ubsan error in `Arena` [#57407](https://github.com/ClickHouse/ClickHouse/pull/57407) ([Nikita Taranov](https://github.com/nickitat)). +* fs cache: add limit for background download [#57424](https://github.com/ClickHouse/ClickHouse/pull/57424) ([Kseniia Sumarokova](https://github.com/kssenii)). +* bugfix: correctly parse SYSTEM STOP LISTEN TCP SECURE [#57483](https://github.com/ClickHouse/ClickHouse/pull/57483) ([joelynch](https://github.com/joelynch)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Add function `arrayRandomSample()`"'. [#56399](https://github.com/ClickHouse/ClickHouse/pull/56399) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Update README.md'. [#56549](https://github.com/ClickHouse/ClickHouse/pull/56549) ([Tyler Hannan](https://github.com/tylerhannan)). +* NO CL ENTRY: 'Revert "FunctionSleep exception message fix"'. [#56591](https://github.com/ClickHouse/ClickHouse/pull/56591) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Inserting only non-duplicate chunks in MV"'. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Add new header for README with updated logo'. [#56607](https://github.com/ClickHouse/ClickHouse/pull/56607) ([Justin de Guzman](https://github.com/justindeguzman)). +* NO CL ENTRY: 'Revert "Add /keeper/availability-zone node to allow server load balancing within AZ."'. [#56610](https://github.com/ClickHouse/ClickHouse/pull/56610) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add 4-letter command for yielding/resigning leadership"'. [#56611](https://github.com/ClickHouse/ClickHouse/pull/56611) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'fix(docs): correct default value for output_format_parquet_compression_method to 'lz4''. [#56614](https://github.com/ClickHouse/ClickHouse/pull/56614) ([james-seymour-cubiko](https://github.com/james-seymour-cubiko)). +* NO CL ENTRY: 'Update except.md'. [#56651](https://github.com/ClickHouse/ClickHouse/pull/56651) ([rondo_1895](https://github.com/yangguang1991)). +* NO CL ENTRY: 'Revert "Add a setting max_execution_time_leaf to limit the execution time on shard for distributed query"'. [#56702](https://github.com/ClickHouse/ClickHouse/pull/56702) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Better except for SSL authentication failure"'. [#56844](https://github.com/ClickHouse/ClickHouse/pull/56844) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "s3 adaptive timeouts"'. [#56992](https://github.com/ClickHouse/ClickHouse/pull/56992) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "s3 adaptive timeouts""'. [#56994](https://github.com/ClickHouse/ClickHouse/pull/56994) ([Sema Checherinda](https://github.com/CheSema)). +* NO CL ENTRY: 'Revert "Resubmit 01600_parts_types_metrics test (possibly without flakiness)"'. [#57163](https://github.com/ClickHouse/ClickHouse/pull/57163) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Mark select() as harmful function"'. [#57195](https://github.com/ClickHouse/ClickHouse/pull/57195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Update Sentry"'. [#57229](https://github.com/ClickHouse/ClickHouse/pull/57229) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add debugging info for 01600_parts_types_metrics on failures"'. [#57232](https://github.com/ClickHouse/ClickHouse/pull/57232) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Update date-time-functions.md"'. [#57329](https://github.com/ClickHouse/ClickHouse/pull/57329) ([Denny Crane](https://github.com/den-crane)). +* NO CL ENTRY: 'Revert "add function getClientHTTPHeader"'. [#57510](https://github.com/ClickHouse/ClickHouse/pull/57510) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add `sqid()` function"'. [#57511](https://github.com/ClickHouse/ClickHouse/pull/57511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add new aggregation function groupArraySorted()"'. [#57519](https://github.com/ClickHouse/ClickHouse/pull/57519) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Implemented series period detect method using pocketfft lib"'. [#57536](https://github.com/ClickHouse/ClickHouse/pull/57536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Support use alias column in indices"'. [#57537](https://github.com/ClickHouse/ClickHouse/pull/57537) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove useless "install" from CMake (step 1) [#36589](https://github.com/ClickHouse/ClickHouse/pull/36589) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer support 'is not distinct from' in join on section [#54068](https://github.com/ClickHouse/ClickHouse/pull/54068) ([vdimir](https://github.com/vdimir)). +* Refactor merge join transform [#55007](https://github.com/ClickHouse/ClickHouse/pull/55007) ([Alex Cheng](https://github.com/Alex-Cheng)). +* Add function jaccardIndex back with better performance [#55126](https://github.com/ClickHouse/ClickHouse/pull/55126) ([vdimir](https://github.com/vdimir)). +* Use more thread pools in BACKUP/RESTORE to avoid its hanging in tests [#55216](https://github.com/ClickHouse/ClickHouse/pull/55216) ([Vitaly Baranov](https://github.com/vitlibar)). +* Parallel replicas: progress bar [#55574](https://github.com/ClickHouse/ClickHouse/pull/55574) ([Igor Nikonov](https://github.com/devcrafter)). +* Analyzer: Fix result type after IfConstantConditionPass [#55951](https://github.com/ClickHouse/ClickHouse/pull/55951) ([Dmitry Novik](https://github.com/novikd)). +* RemoteSource: remove unnecessary flag [#55980](https://github.com/ClickHouse/ClickHouse/pull/55980) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix `REPLICA_ALREADY_EXISTS` for ReplicatedMergeTree [#56000](https://github.com/ClickHouse/ClickHouse/pull/56000) ([Nikolay Degterinsky](https://github.com/evillique)). +* Rework [#52159](https://github.com/ClickHouse/ClickHouse/issues/52159) to avoid coredump generation [#56039](https://github.com/ClickHouse/ClickHouse/pull/56039) ([Raúl Marín](https://github.com/Algunenano)). +* Bump gRPC to v1.47.5 [#56059](https://github.com/ClickHouse/ClickHouse/pull/56059) ([Robert Schulze](https://github.com/rschu1ze)). +* See what happens if we use less different docker images in integration tests [#56082](https://github.com/ClickHouse/ClickHouse/pull/56082) ([Raúl Marín](https://github.com/Algunenano)). +* Add missing zookeeper retries in StorageReplicatedMergeTree::backupData [#56131](https://github.com/ClickHouse/ClickHouse/pull/56131) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better process broken parts on table start for replicated tables [#56142](https://github.com/ClickHouse/ClickHouse/pull/56142) ([alesapin](https://github.com/alesapin)). +* Add more details to "Data after merge is not byte-identical to data on another replicas" [#56164](https://github.com/ClickHouse/ClickHouse/pull/56164) ([Azat Khuzhin](https://github.com/azat)). +* Revert "Revert "Fix output/input of Arrow dictionary column"" [#56167](https://github.com/ClickHouse/ClickHouse/pull/56167) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a log message for DatabaseReplicated [#56215](https://github.com/ClickHouse/ClickHouse/pull/56215) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Correct aggregate function cross tab accessors to be endianness-independent. [#56223](https://github.com/ClickHouse/ClickHouse/pull/56223) ([Austin Kothig](https://github.com/kothiga)). +* Fix client suggestions for user without grants [#56234](https://github.com/ClickHouse/ClickHouse/pull/56234) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix link to failed check report in status commit [#56243](https://github.com/ClickHouse/ClickHouse/pull/56243) ([vdimir](https://github.com/vdimir)). +* Analyzer: fix 01019_alter_materialized_view_consistent [#56246](https://github.com/ClickHouse/ClickHouse/pull/56246) ([vdimir](https://github.com/vdimir)). +* Properly process aliases for aggregation-by-partition optimization. [#56254](https://github.com/ClickHouse/ClickHouse/pull/56254) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* deltalake: Do not raise errors when processing add and remove actions [#56260](https://github.com/ClickHouse/ClickHouse/pull/56260) ([joelynch](https://github.com/joelynch)). +* Fix rare logical error in Replicated database [#56272](https://github.com/ClickHouse/ClickHouse/pull/56272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update version_date.tsv and changelogs after v23.10.1.1976-stable [#56278](https://github.com/ClickHouse/ClickHouse/pull/56278) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Add assertion that `SizePredictor` is set if `preferred_block_size_bytes` is set [#56302](https://github.com/ClickHouse/ClickHouse/pull/56302) ([Nikita Taranov](https://github.com/nickitat)). +* Implement digest helpers for different objects [#56305](https://github.com/ClickHouse/ClickHouse/pull/56305) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Removed stale events from README [#56311](https://github.com/ClickHouse/ClickHouse/pull/56311) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix more tests with analyzer. [#56315](https://github.com/ClickHouse/ClickHouse/pull/56315) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Change some exception codes [#56316](https://github.com/ClickHouse/ClickHouse/pull/56316) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix using table shared id during backup and improve logs. [#56339](https://github.com/ClickHouse/ClickHouse/pull/56339) ([Vitaly Baranov](https://github.com/vitlibar)). +* Print info while decompressing the binary [#56360](https://github.com/ClickHouse/ClickHouse/pull/56360) ([Antonio Andelic](https://github.com/antonio2368)). +* remove unstable test test_heavy_insert_select_check_memory [#56369](https://github.com/ClickHouse/ClickHouse/pull/56369) ([Sema Checherinda](https://github.com/CheSema)). +* Update test_storage_s3_queue/test.py [#56370](https://github.com/ClickHouse/ClickHouse/pull/56370) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update 02735_system_zookeeper_connection.sql [#56374](https://github.com/ClickHouse/ClickHouse/pull/56374) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Cleanup convenience functions in IDataType [#56375](https://github.com/ClickHouse/ClickHouse/pull/56375) ([Robert Schulze](https://github.com/rschu1ze)). +* Update test_storage_s3_queue [#56376](https://github.com/ClickHouse/ClickHouse/pull/56376) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Minor improvements for S3Queue [#56377](https://github.com/ClickHouse/ClickHouse/pull/56377) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add obsolete setting back [#56382](https://github.com/ClickHouse/ClickHouse/pull/56382) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update stress.py [#56388](https://github.com/ClickHouse/ClickHouse/pull/56388) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix rocksdb with analyzer. [#56391](https://github.com/ClickHouse/ClickHouse/pull/56391) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Option to check particular file with utils/check-style/check-doc-aspell [#56394](https://github.com/ClickHouse/ClickHouse/pull/56394) ([vdimir](https://github.com/vdimir)). +* Add a metric for suspicious parts in ZooKeeper [#56395](https://github.com/ClickHouse/ClickHouse/pull/56395) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 02404_memory_bound_merging with analyzer. [#56419](https://github.com/ClickHouse/ClickHouse/pull/56419) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* move storage_metadata_write_full_object_key setting to the server scope [#56421](https://github.com/ClickHouse/ClickHouse/pull/56421) ([Sema Checherinda](https://github.com/CheSema)). +* Make autoscaling more responsive [#56422](https://github.com/ClickHouse/ClickHouse/pull/56422) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix `test_attach_without_fetching` [#56429](https://github.com/ClickHouse/ClickHouse/pull/56429) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use `pcg` + `randomSeed()` instead of `std::mt19937`/`std::random_device` [#56430](https://github.com/ClickHouse/ClickHouse/pull/56430) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix test `02725_database_hdfs.sh` [#56457](https://github.com/ClickHouse/ClickHouse/pull/56457) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update the AMI receipt [#56459](https://github.com/ClickHouse/ClickHouse/pull/56459) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Make IMergeTreeDataPart::getState() inlinable [#56461](https://github.com/ClickHouse/ClickHouse/pull/56461) ([Alexander Gololobov](https://github.com/davenger)). +* Update version_date.tsv and changelogs after v23.10.2.13-stable [#56467](https://github.com/ClickHouse/ClickHouse/pull/56467) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.9.4.11-stable [#56468](https://github.com/ClickHouse/ClickHouse/pull/56468) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.8.6.16-lts [#56469](https://github.com/ClickHouse/ClickHouse/pull/56469) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.16.7-lts [#56470](https://github.com/ClickHouse/ClickHouse/pull/56470) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Disable randomization of allow_experimental_block_number_column flag [#56474](https://github.com/ClickHouse/ClickHouse/pull/56474) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Parallel clone sparse/shallow submodules [#56479](https://github.com/ClickHouse/ClickHouse/pull/56479) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix default port for Replicated database cluster [#56486](https://github.com/ClickHouse/ClickHouse/pull/56486) ([Nikolay Degterinsky](https://github.com/evillique)). +* Updated compression to LZ4 [#56497](https://github.com/ClickHouse/ClickHouse/pull/56497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Analyzer remove unused projection columns [#56499](https://github.com/ClickHouse/ClickHouse/pull/56499) ([Maksim Kita](https://github.com/kitaisreal)). +* FunctionSleep exception message fix [#56500](https://github.com/ClickHouse/ClickHouse/pull/56500) ([Maksim Kita](https://github.com/kitaisreal)). +* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer special functions projection names fix [#56514](https://github.com/ClickHouse/ClickHouse/pull/56514) ([Maksim Kita](https://github.com/kitaisreal)). +* CTE invalid query analysis add test [#56517](https://github.com/ClickHouse/ClickHouse/pull/56517) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix compilation of BackupsWorker.cpp [#56518](https://github.com/ClickHouse/ClickHouse/pull/56518) ([Vitaly Baranov](https://github.com/vitlibar)). +* Analyzer MoveFunctionsOutOfAnyPass refactoring [#56520](https://github.com/ClickHouse/ClickHouse/pull/56520) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer support EXPLAIN ESTIMATE [#56522](https://github.com/ClickHouse/ClickHouse/pull/56522) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer log used row policies [#56531](https://github.com/ClickHouse/ClickHouse/pull/56531) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer ORDER BY read in order query plan add test [#56532](https://github.com/ClickHouse/ClickHouse/pull/56532) ([Maksim Kita](https://github.com/kitaisreal)). +* ReplicatedMergeTree: check shutdown flags in retry loops [#56533](https://github.com/ClickHouse/ClickHouse/pull/56533) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix race between REPLACE_RANGE and GET_PART (set actual part name when fetching) [#56536](https://github.com/ClickHouse/ClickHouse/pull/56536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Bump gRPC to v1.54.3 [#56543](https://github.com/ClickHouse/ClickHouse/pull/56543) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky LDAP integration tests [#56544](https://github.com/ClickHouse/ClickHouse/pull/56544) ([Julian Maicher](https://github.com/jmaicher)). +* Remove useless using [#56546](https://github.com/ClickHouse/ClickHouse/pull/56546) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better warning message [#56547](https://github.com/ClickHouse/ClickHouse/pull/56547) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow `chassert` to guide the static analyzer [#56552](https://github.com/ClickHouse/ClickHouse/pull/56552) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove C++ templates [#56556](https://github.com/ClickHouse/ClickHouse/pull/56556) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `test_keeper_four_word_command/test.py::test_cmd_crst` [#56570](https://github.com/ClickHouse/ClickHouse/pull/56570) ([Antonio Andelic](https://github.com/antonio2368)). +* Delete unnecessary file from tests [#56572](https://github.com/ClickHouse/ClickHouse/pull/56572) ([vdimir](https://github.com/vdimir)). +* Analyzer: fix logical error with set in array join [#56587](https://github.com/ClickHouse/ClickHouse/pull/56587) ([vdimir](https://github.com/vdimir)). +* hide VERSION_INLINE_DATA under feature flag [#56594](https://github.com/ClickHouse/ClickHouse/pull/56594) ([Sema Checherinda](https://github.com/CheSema)). +* Fix 02554_fix_grouping_sets_predicate_push_down with analyzer. [#56595](https://github.com/ClickHouse/ClickHouse/pull/56595) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add "FunctionSleep exception message fix" again [#56597](https://github.com/ClickHouse/ClickHouse/pull/56597) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v23.10.3.5-stable [#56606](https://github.com/ClickHouse/ClickHouse/pull/56606) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove bad test [#56612](https://github.com/ClickHouse/ClickHouse/pull/56612) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Symbolize `trace_log` for exporting [#56613](https://github.com/ClickHouse/ClickHouse/pull/56613) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add indices to exported system logs [#56615](https://github.com/ClickHouse/ClickHouse/pull/56615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove dependencies [#56616](https://github.com/ClickHouse/ClickHouse/pull/56616) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* WIP: Add test describing MV deduplication issues [#56621](https://github.com/ClickHouse/ClickHouse/pull/56621) ([Jordi Villar](https://github.com/jrdi)). +* Add test for ROW POLICY ON CLUSTER [#56623](https://github.com/ClickHouse/ClickHouse/pull/56623) ([Nikolay Degterinsky](https://github.com/evillique)). +* Enable --secure flag for clickhouse-client for hostnames pointing to clickhouse cloud [#56638](https://github.com/ClickHouse/ClickHouse/pull/56638) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Continue with work from [#56621](https://github.com/ClickHouse/ClickHouse/issues/56621) [#56641](https://github.com/ClickHouse/ClickHouse/pull/56641) ([Jordi Villar](https://github.com/jrdi)). +* Switch to SSL port for clickhouse-client for hostnames pointing to clickhouse cloud [#56649](https://github.com/ClickHouse/ClickHouse/pull/56649) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Remove garbage from libssh [#56654](https://github.com/ClickHouse/ClickHouse/pull/56654) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Delete a file [#56655](https://github.com/ClickHouse/ClickHouse/pull/56655) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Delete a file (2) [#56656](https://github.com/ClickHouse/ClickHouse/pull/56656) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove some entries from `analyzer_tech_debt.txt` [#56658](https://github.com/ClickHouse/ClickHouse/pull/56658) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Miscellaneous [#56662](https://github.com/ClickHouse/ClickHouse/pull/56662) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Bump gRPC to v1.55.4 and protobuf to v22.5 [#56664](https://github.com/ClickHouse/ClickHouse/pull/56664) ([Robert Schulze](https://github.com/rschu1ze)). +* Small refactoring of AST hash calculation (follow-up to [#56545](https://github.com/ClickHouse/ClickHouse/issues/56545)) [#56665](https://github.com/ClickHouse/ClickHouse/pull/56665) ([Robert Schulze](https://github.com/rschu1ze)). +* Analyzer: filtering by virtual columns for StorageS3 [#56668](https://github.com/ClickHouse/ClickHouse/pull/56668) ([vdimir](https://github.com/vdimir)). +* Add back flaky tests to analyzer_tech_debt.txt [#56669](https://github.com/ClickHouse/ClickHouse/pull/56669) ([Raúl Marín](https://github.com/Algunenano)). +* gRPC: remove build dependency on systemd [#56671](https://github.com/ClickHouse/ClickHouse/pull/56671) ([Raúl Marín](https://github.com/Algunenano)). +* Remove unused code [#56677](https://github.com/ClickHouse/ClickHouse/pull/56677) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix missing argument for style_check.py in master workflow [#56691](https://github.com/ClickHouse/ClickHouse/pull/56691) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix unexpected parts handling [#56693](https://github.com/ClickHouse/ClickHouse/pull/56693) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert "Revert "Add a setting max_execution_time_leaf to limit the execution time on shard for distributed query"" [#56707](https://github.com/ClickHouse/ClickHouse/pull/56707) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix use_structure_from_insertion_table_in_table_functions with new Analyzer [#56708](https://github.com/ClickHouse/ClickHouse/pull/56708) ([Kruglov Pavel](https://github.com/Avogar)). +* Disable settings randomisation for `02896_memory_accounting_for_user.sh` [#56709](https://github.com/ClickHouse/ClickHouse/pull/56709) ([Nikita Taranov](https://github.com/nickitat)). +* Light autogenerated file [#56720](https://github.com/ClickHouse/ClickHouse/pull/56720) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less CMake checks [#56721](https://github.com/ClickHouse/ClickHouse/pull/56721) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove orphan header files [#56722](https://github.com/ClickHouse/ClickHouse/pull/56722) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try to fix hang in 01104_distributed_numbers_test [#56764](https://github.com/ClickHouse/ClickHouse/pull/56764) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Test RabbitMQ with secure connection [#56767](https://github.com/ClickHouse/ClickHouse/pull/56767) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flaky test_replicated_merge_tree_encryption_codec. [#56768](https://github.com/ClickHouse/ClickHouse/pull/56768) ([Vitaly Baranov](https://github.com/vitlibar)). +* fix typo in ClickHouseDictionarySource [#56776](https://github.com/ClickHouse/ClickHouse/pull/56776) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add test for avoided recursion [#56785](https://github.com/ClickHouse/ClickHouse/pull/56785) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix randomization of Keeper configs in stress tests [#56788](https://github.com/ClickHouse/ClickHouse/pull/56788) ([Antonio Andelic](https://github.com/antonio2368)). +* Try fix `No user in current context, it's a bug` [#56789](https://github.com/ClickHouse/ClickHouse/pull/56789) ([Antonio Andelic](https://github.com/antonio2368)). +* Update avg_weighted.xml [#56797](https://github.com/ClickHouse/ClickHouse/pull/56797) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Better except for SSL authentication failure [#56811](https://github.com/ClickHouse/ClickHouse/pull/56811) ([Nikolay Degterinsky](https://github.com/evillique)). +* More stable `test_keeper_reconfig_replace_leader` [#56835](https://github.com/ClickHouse/ClickHouse/pull/56835) ([Antonio Andelic](https://github.com/antonio2368)). +* Add cancellation hook for moving background operation [#56846](https://github.com/ClickHouse/ClickHouse/pull/56846) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Updated comment in universal.sh [#56852](https://github.com/ClickHouse/ClickHouse/pull/56852) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump gRPC to v1.59 and protobuf to v24.4 [#56853](https://github.com/ClickHouse/ClickHouse/pull/56853) ([Robert Schulze](https://github.com/rschu1ze)). +* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)). +* Sparse checkout: Use `--remote` for `git submodule update` [#56857](https://github.com/ClickHouse/ClickHouse/pull/56857) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Fix `test_keeper_broken_logs` [#56858](https://github.com/ClickHouse/ClickHouse/pull/56858) ([Antonio Andelic](https://github.com/antonio2368)). +* CMake: Small cleanup in cpu_features.cmake [#56861](https://github.com/ClickHouse/ClickHouse/pull/56861) ([Robert Schulze](https://github.com/rschu1ze)). +* Planner support transactions [#56867](https://github.com/ClickHouse/ClickHouse/pull/56867) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve diagnostics in test 02908_many_requests_to_system_replicas [#56869](https://github.com/ClickHouse/ClickHouse/pull/56869) ([Alexander Gololobov](https://github.com/davenger)). +* Update 01052_window_view_proc_tumble_to_now.sh [#56870](https://github.com/ClickHouse/ClickHouse/pull/56870) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Call cache check a bit more often [#56872](https://github.com/ClickHouse/ClickHouse/pull/56872) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update test_storage_s3_queue/test.py [#56874](https://github.com/ClickHouse/ClickHouse/pull/56874) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix perf tests report when there are no tests [#56881](https://github.com/ClickHouse/ClickHouse/pull/56881) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove ctest [#56894](https://github.com/ClickHouse/ClickHouse/pull/56894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simpler CMake [#56898](https://github.com/ClickHouse/ClickHouse/pull/56898) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* test for [#56790](https://github.com/ClickHouse/ClickHouse/issues/56790) [#56899](https://github.com/ClickHouse/ClickHouse/pull/56899) ([Denny Crane](https://github.com/den-crane)). +* Allow delegate disk to handle retries for createDirectories [#56905](https://github.com/ClickHouse/ClickHouse/pull/56905) ([Alexander Gololobov](https://github.com/davenger)). +* Update version_date.tsv and changelogs after v23.10.4.25-stable [#56906](https://github.com/ClickHouse/ClickHouse/pull/56906) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.17.13-lts [#56907](https://github.com/ClickHouse/ClickHouse/pull/56907) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.8.7.24-lts [#56908](https://github.com/ClickHouse/ClickHouse/pull/56908) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.9.5.29-stable [#56909](https://github.com/ClickHouse/ClickHouse/pull/56909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove outdated instructions [#56911](https://github.com/ClickHouse/ClickHouse/pull/56911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix race on zk_log initialization [#56915](https://github.com/ClickHouse/ClickHouse/pull/56915) ([Alexander Gololobov](https://github.com/davenger)). +* Check what will happen if I remove some lines [#56916](https://github.com/ClickHouse/ClickHouse/pull/56916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update fasttest [#56919](https://github.com/ClickHouse/ClickHouse/pull/56919) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make some tests independent of macro settings [#56927](https://github.com/ClickHouse/ClickHouse/pull/56927) ([Raúl Marín](https://github.com/Algunenano)). +* Fix flaky 02494_query_cache_events [#56935](https://github.com/ClickHouse/ClickHouse/pull/56935) ([Robert Schulze](https://github.com/rschu1ze)). +* Add CachedReadBufferReadFromCache{Hits,Misses} profile events [#56936](https://github.com/ClickHouse/ClickHouse/pull/56936) ([Jordi Villar](https://github.com/jrdi)). +* Send fatal logs by default in clickhouse-local [#56956](https://github.com/ClickHouse/ClickHouse/pull/56956) ([Nikolay Degterinsky](https://github.com/evillique)). +* Resubmit: Better except for SSL authentication [#56957](https://github.com/ClickHouse/ClickHouse/pull/56957) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `test_keeper_auth` [#56960](https://github.com/ClickHouse/ClickHouse/pull/56960) ([Antonio Andelic](https://github.com/antonio2368)). +* Fewer concurrent requests in 02908_many_requests_to_system_replicas [#56968](https://github.com/ClickHouse/ClickHouse/pull/56968) ([Alexander Gololobov](https://github.com/davenger)). +* Own CMake for GRPC [#56971](https://github.com/ClickHouse/ClickHouse/pull/56971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build in Backups/BackupIO_S3.cpp [#56974](https://github.com/ClickHouse/ClickHouse/pull/56974) ([Robert Schulze](https://github.com/rschu1ze)). +* Add exclude for tryBase64Decode to backward compat test (follow-up to [#56913](https://github.com/ClickHouse/ClickHouse/issues/56913)) [#56975](https://github.com/ClickHouse/ClickHouse/pull/56975) ([Robert Schulze](https://github.com/rschu1ze)). +* Prefer sccache to ccache by default [#56980](https://github.com/ClickHouse/ClickHouse/pull/56980) ([Igor Nikonov](https://github.com/devcrafter)). +* update 02003_memory_limit_in_client.sh [#56981](https://github.com/ClickHouse/ClickHouse/pull/56981) ([Bharat Nallan](https://github.com/bharatnc)). +* Make check for the limited cmake dependencies the part of sparse checkout [#56991](https://github.com/ClickHouse/ClickHouse/pull/56991) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky and slow tests. [#56993](https://github.com/ClickHouse/ClickHouse/pull/56993) ([Amos Bird](https://github.com/amosbird)). +* Fix dropping tables in test_create_or_drop_tables_during_backup [#57007](https://github.com/ClickHouse/ClickHouse/pull/57007) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable Analyzer in Stress and Fuzz tests [#57008](https://github.com/ClickHouse/ClickHouse/pull/57008) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Run CI for PRs with missing documentation [#57018](https://github.com/ClickHouse/ClickHouse/pull/57018) ([Michael Kolupaev](https://github.com/al13n321)). +* test_s3_engine_heavy_write_check_mem: turn test off [#57025](https://github.com/ClickHouse/ClickHouse/pull/57025) ([Sema Checherinda](https://github.com/CheSema)). +* NamedCollections: make exception message more informative. [#57031](https://github.com/ClickHouse/ClickHouse/pull/57031) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Avoid returning biggest resolution when fpr > 0.283 [#57034](https://github.com/ClickHouse/ClickHouse/pull/57034) ([Jordi Villar](https://github.com/jrdi)). +* Fix: suppress TSAN in RabbitMQ test [#57040](https://github.com/ClickHouse/ClickHouse/pull/57040) ([Igor Nikonov](https://github.com/devcrafter)). +* Small Keeper fixes [#57047](https://github.com/ClickHouse/ClickHouse/pull/57047) ([Antonio Andelic](https://github.com/antonio2368)). +* Parallel replicas: cleanup, narrow dependency [#57054](https://github.com/ClickHouse/ClickHouse/pull/57054) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix gRPC build on macOS [#57061](https://github.com/ClickHouse/ClickHouse/pull/57061) ([Robert Schulze](https://github.com/rschu1ze)). +* Better comment for ITransformingStep::transformPipeline [#57062](https://github.com/ClickHouse/ClickHouse/pull/57062) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Duplicate set` for StorageSet with analyzer. [#57063](https://github.com/ClickHouse/ClickHouse/pull/57063) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Better metadata path [#57083](https://github.com/ClickHouse/ClickHouse/pull/57083) ([Nikolay Degterinsky](https://github.com/evillique)). +* Analyzer fuzzer 3 (aggregate_functions_null_for_empty for projections) [#57099](https://github.com/ClickHouse/ClickHouse/pull/57099) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update numbers.md [#57100](https://github.com/ClickHouse/ClickHouse/pull/57100) ([konruvikt](https://github.com/konruvikt)). +* Fix FunctionNode::toASTImpl [#57102](https://github.com/ClickHouse/ClickHouse/pull/57102) ([vdimir](https://github.com/vdimir)). +* Analyzer fuzzer 5 [#57103](https://github.com/ClickHouse/ClickHouse/pull/57103) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow HashedDictionary/FunctionsConversion as large TU [#57108](https://github.com/ClickHouse/ClickHouse/pull/57108) ([Azat Khuzhin](https://github.com/azat)). +* Disable checksums for builds with fuzzer [#57122](https://github.com/ClickHouse/ClickHouse/pull/57122) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer: Fix logical error in LogicalExpressionOptimizerVisitor [#57123](https://github.com/ClickHouse/ClickHouse/pull/57123) ([vdimir](https://github.com/vdimir)). +* Split HashedDictionary CU [#57124](https://github.com/ClickHouse/ClickHouse/pull/57124) ([Azat Khuzhin](https://github.com/azat)). +* Cancel executor in ~CreatingSetsTransform [#57125](https://github.com/ClickHouse/ClickHouse/pull/57125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix system.*_log in artifacts on CI [#57128](https://github.com/ClickHouse/ClickHouse/pull/57128) ([Azat Khuzhin](https://github.com/azat)). +* Fix something in ReplicatedMergeTree [#57129](https://github.com/ClickHouse/ClickHouse/pull/57129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not symbolize traces for debug/sanitizer builds for sending to cloud [#57130](https://github.com/ClickHouse/ClickHouse/pull/57130) ([Azat Khuzhin](https://github.com/azat)). +* Resubmit 01600_parts_types_metrics test (possibly without flakiness) [#57131](https://github.com/ClickHouse/ClickHouse/pull/57131) ([Azat Khuzhin](https://github.com/azat)). +* Follow up to [#56541](https://github.com/ClickHouse/ClickHouse/issues/56541) [#57141](https://github.com/ClickHouse/ClickHouse/pull/57141) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to disable reorder-functions-after-sorting optimization [#57144](https://github.com/ClickHouse/ClickHouse/pull/57144) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix bad test `00002_log_and_exception_messages_formatting` [#57145](https://github.com/ClickHouse/ClickHouse/pull/57145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test test_replicated_merge_tree_encryption_codec/test.py::test_different_keys [#57146](https://github.com/ClickHouse/ClickHouse/pull/57146) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove partial results from build matrix for stress tests [#57150](https://github.com/ClickHouse/ClickHouse/pull/57150) ([Azat Khuzhin](https://github.com/azat)). +* Minor changes in test_check_table [#57154](https://github.com/ClickHouse/ClickHouse/pull/57154) ([vdimir](https://github.com/vdimir)). +* Fix 02903_rmt_retriable_merge_exception flakiness for replicated database [#57155](https://github.com/ClickHouse/ClickHouse/pull/57155) ([Azat Khuzhin](https://github.com/azat)). +* Mark select() as harmful function [#57156](https://github.com/ClickHouse/ClickHouse/pull/57156) ([Igor Nikonov](https://github.com/devcrafter)). +* Improve the cherry-pick PR description [#57167](https://github.com/ClickHouse/ClickHouse/pull/57167) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add debugging info for 01600_parts_types_metrics on failures [#57170](https://github.com/ClickHouse/ClickHouse/pull/57170) ([Azat Khuzhin](https://github.com/azat)). +* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update blob_storage_log.md [#57187](https://github.com/ClickHouse/ClickHouse/pull/57187) ([vdimir](https://github.com/vdimir)). +* [RFC] Set log_comment to the file name while processing files in client [#57191](https://github.com/ClickHouse/ClickHouse/pull/57191) ([Azat Khuzhin](https://github.com/azat)). +* Add test for [#5323](https://github.com/ClickHouse/ClickHouse/issues/5323) [#57192](https://github.com/ClickHouse/ClickHouse/pull/57192) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer fuzzer 6 (arrayJoin) [#57198](https://github.com/ClickHouse/ClickHouse/pull/57198) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add test for [#47366](https://github.com/ClickHouse/ClickHouse/issues/47366) [#57200](https://github.com/ClickHouse/ClickHouse/pull/57200) ([Raúl Marín](https://github.com/Algunenano)). +* Add test for [#51321](https://github.com/ClickHouse/ClickHouse/issues/51321) [#57202](https://github.com/ClickHouse/ClickHouse/pull/57202) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible crash (in Rust) of fuzzy finder in client [#57204](https://github.com/ClickHouse/ClickHouse/pull/57204) ([Azat Khuzhin](https://github.com/azat)). +* fix zero-copy locks leaking [#57205](https://github.com/ClickHouse/ClickHouse/pull/57205) ([Sema Checherinda](https://github.com/CheSema)). +* Fix test_distributed_storage_configuration flakiness [#57206](https://github.com/ClickHouse/ClickHouse/pull/57206) ([Azat Khuzhin](https://github.com/azat)). +* Update Sentry [#57222](https://github.com/ClickHouse/ClickHouse/pull/57222) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.10.5.20-stable [#57223](https://github.com/ClickHouse/ClickHouse/pull/57223) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.9.6.20-stable [#57224](https://github.com/ClickHouse/ClickHouse/pull/57224) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.18.15-lts [#57225](https://github.com/ClickHouse/ClickHouse/pull/57225) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.8.8.20-lts [#57226](https://github.com/ClickHouse/ClickHouse/pull/57226) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Change cursor style for overwrite mode (INS) to blinking in client [#57227](https://github.com/ClickHouse/ClickHouse/pull/57227) ([Azat Khuzhin](https://github.com/azat)). +* Remove test `01280_ttl_where_group_by` [#57230](https://github.com/ClickHouse/ClickHouse/pull/57230) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix docs [#57234](https://github.com/ClickHouse/ClickHouse/pull/57234) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove addBatchSinglePlaceFromInterval [#57258](https://github.com/ClickHouse/ClickHouse/pull/57258) ([Raúl Marín](https://github.com/Algunenano)). +* Add some additional groups to CI [#57260](https://github.com/ClickHouse/ClickHouse/pull/57260) ([alesapin](https://github.com/alesapin)). +* Analyzer: fix result type of aggregate function with NULL [#57265](https://github.com/ClickHouse/ClickHouse/pull/57265) ([vdimir](https://github.com/vdimir)). +* Ignore memory exception in Keeper asio workers [#57268](https://github.com/ClickHouse/ClickHouse/pull/57268) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix code reports [#57301](https://github.com/ClickHouse/ClickHouse/pull/57301) ([Raúl Marín](https://github.com/Algunenano)). +* Follow up recommendations from [#57167](https://github.com/ClickHouse/ClickHouse/issues/57167) [#57302](https://github.com/ClickHouse/ClickHouse/pull/57302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add back flaky tests to analyzer_tech_debt.txt [#57307](https://github.com/ClickHouse/ClickHouse/pull/57307) ([Raúl Marín](https://github.com/Algunenano)). +* Lower level for annoying S3 log [#57312](https://github.com/ClickHouse/ClickHouse/pull/57312) ([Antonio Andelic](https://github.com/antonio2368)). +* Add regression test for skim (Rust) crash on pasting certain input [#57313](https://github.com/ClickHouse/ClickHouse/pull/57313) ([Azat Khuzhin](https://github.com/azat)). +* Remove unused Strings from MergeTreeData [#57318](https://github.com/ClickHouse/ClickHouse/pull/57318) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Address 02668_ulid_decoding flakiness [#57320](https://github.com/ClickHouse/ClickHouse/pull/57320) ([Raúl Marín](https://github.com/Algunenano)). +* DiskWeb fix [#57322](https://github.com/ClickHouse/ClickHouse/pull/57322) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update README.md [#57325](https://github.com/ClickHouse/ClickHouse/pull/57325) ([Tyler Hannan](https://github.com/tylerhannan)). +* Add information about new _size virtual column in file/s3/url/hdfs/azure table functions [#57328](https://github.com/ClickHouse/ClickHouse/pull/57328) ([Kruglov Pavel](https://github.com/Avogar)). +* Follow-up to [#56490](https://github.com/ClickHouse/ClickHouse/issues/56490): Fix build with `cmake -DENABLE_LIBRARIES=0` [#57330](https://github.com/ClickHouse/ClickHouse/pull/57330) ([Robert Schulze](https://github.com/rschu1ze)). +* Mark a setting obsolete [#57336](https://github.com/ClickHouse/ClickHouse/pull/57336) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Always renew ZK client in `WithRetries` [#57357](https://github.com/ClickHouse/ClickHouse/pull/57357) ([Antonio Andelic](https://github.com/antonio2368)). +* Shutdown disks after tables [#57358](https://github.com/ClickHouse/ClickHouse/pull/57358) ([Alexander Gololobov](https://github.com/davenger)). +* Update DDLTask.cpp [#57369](https://github.com/ClickHouse/ClickHouse/pull/57369) ([Alexander Tokmakov](https://github.com/tavplubix)). +* verbose exception messages for StorageFuzzJSON [#57372](https://github.com/ClickHouse/ClickHouse/pull/57372) ([Julia Kartseva](https://github.com/jkartseva)). +* Initialize only required disks in clickhouse-disks [#57387](https://github.com/ClickHouse/ClickHouse/pull/57387) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow wildcards in directories for partitioned write with File storage [#57391](https://github.com/ClickHouse/ClickHouse/pull/57391) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add tests for 43202 [#57404](https://github.com/ClickHouse/ClickHouse/pull/57404) ([Raúl Marín](https://github.com/Algunenano)). +* Consider whole check failure in bugfix validate check as an error [#57413](https://github.com/ClickHouse/ClickHouse/pull/57413) ([vdimir](https://github.com/vdimir)). +* Change type of s3_cache in test_encrypted_disk [#57416](https://github.com/ClickHouse/ClickHouse/pull/57416) ([vdimir](https://github.com/vdimir)). +* Add extra debug information on replication consistency errors [#57419](https://github.com/ClickHouse/ClickHouse/pull/57419) ([Raúl Marín](https://github.com/Algunenano)). +* Don't print server revision in client on connect [#57435](https://github.com/ClickHouse/ClickHouse/pull/57435) ([Nikita Taranov](https://github.com/nickitat)). +* Adding Sydney Meetup [#57457](https://github.com/ClickHouse/ClickHouse/pull/57457) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix adjusting log_comment in case of multiple files passed [#57464](https://github.com/ClickHouse/ClickHouse/pull/57464) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test 02697_stop_reading_on_first_cancel.sh [#57481](https://github.com/ClickHouse/ClickHouse/pull/57481) ([Raúl Marín](https://github.com/Algunenano)). +* Tiny refactoring around cache [#57482](https://github.com/ClickHouse/ClickHouse/pull/57482) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Decrease default value for `filesystem_prefetch_min_bytes_for_single_read_task` [#57489](https://github.com/ClickHouse/ClickHouse/pull/57489) ([Nikita Taranov](https://github.com/nickitat)). +* Remove bad test [#57494](https://github.com/ClickHouse/ClickHouse/pull/57494) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add changelog for 23.11 [#57517](https://github.com/ClickHouse/ClickHouse/pull/57517) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simple cleanup in distributed (while dealing with parallel replicas) [#57518](https://github.com/ClickHouse/ClickHouse/pull/57518) ([Igor Nikonov](https://github.com/devcrafter)). +* Remove a feature. [#57521](https://github.com/ClickHouse/ClickHouse/pull/57521) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `S3Queue` is production ready [#57548](https://github.com/ClickHouse/ClickHouse/pull/57548) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert "Merge pull request [#56724](https://github.com/ClickHouse/ClickHouse/issues/56724) from canhld94/ch_replicated_column_mismatch" [#57576](https://github.com/ClickHouse/ClickHouse/pull/57576) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v23.11.2.11-stable.md b/docs/changelogs/v23.11.2.11-stable.md new file mode 100644 index 00000000000..490cc9a4590 --- /dev/null +++ b/docs/changelogs/v23.11.2.11-stable.md @@ -0,0 +1,22 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.11.2.11-stable (6e5411358c8) FIXME as compared to v23.11.1.2711-stable (05bc8ef1e02) + +#### Improvement +* Backported in [#57661](https://github.com/ClickHouse/ClickHouse/issues/57661): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix SIGSEGV for aggregation of sparse columns with any() RESPECT NULL [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631) [#57766](https://github.com/ClickHouse/ClickHouse/pull/57766) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.11.3.23-stable.md b/docs/changelogs/v23.11.3.23-stable.md new file mode 100644 index 00000000000..7fcc65beb54 --- /dev/null +++ b/docs/changelogs/v23.11.3.23-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.11.3.23-stable (a14ab450b0e) FIXME as compared to v23.11.2.11-stable (6e5411358c8) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). +* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)). +* Revert "Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631)" [#58031](https://github.com/ClickHouse/ClickHouse/pull/58031) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL CATEGORY + +* Backported in [#57918](https://github.com/ClickHouse/ClickHouse/issues/57918):. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)). +* Always use `pread` for reading cache segments [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)). + diff --git a/docs/changelogs/v23.12.1.1368-stable.md b/docs/changelogs/v23.12.1.1368-stable.md new file mode 100644 index 00000000000..1a322ae9c0f --- /dev/null +++ b/docs/changelogs/v23.12.1.1368-stable.md @@ -0,0 +1,327 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.12.1.1368-stable (a2faa65b080) FIXME as compared to v23.11.1.2711-stable (05bc8ef1e02) + +#### Backward Incompatible Change +* Fix check for non-deterministic functions in TTL expressions. Previously, you could create a TTL expression with non-deterministic functions in some cases, which could lead to undefined behavior later. This fixes [#37250](https://github.com/ClickHouse/ClickHouse/issues/37250). Disallow TTL expressions that don't depend on any columns of a table by default. It can be allowed back by `SET allow_suspicious_ttl_expressions = 1` or `SET compatibility = '23.11'`. Closes [#37286](https://github.com/ClickHouse/ClickHouse/issues/37286). [#51858](https://github.com/ClickHouse/ClickHouse/pull/51858) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove function `arrayFold` because it has a bug. This closes [#57816](https://github.com/ClickHouse/ClickHouse/issues/57816). This closes [#57458](https://github.com/ClickHouse/ClickHouse/issues/57458). [#57836](https://github.com/ClickHouse/ClickHouse/pull/57836) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove the feature of `is_deleted` row in ReplacingMergeTree and the `CLEANUP` modifier for the OPTIMIZE query. This fixes [#57930](https://github.com/ClickHouse/ClickHouse/issues/57930). This closes [#54988](https://github.com/ClickHouse/ClickHouse/issues/54988). This closes [#54570](https://github.com/ClickHouse/ClickHouse/issues/54570). This closes [#50346](https://github.com/ClickHouse/ClickHouse/issues/50346). This closes [#47579](https://github.com/ClickHouse/ClickHouse/issues/47579). The feature has to be removed because it is not good. We have to remove it as quickly as possible, because there is no other option. [#57932](https://github.com/ClickHouse/ClickHouse/pull/57932) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58267](https://github.com/ClickHouse/ClickHouse/pull/58267) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### New Feature +* Allow disabling of HEAD request before GET request. [#54602](https://github.com/ClickHouse/ClickHouse/pull/54602) ([Fionera](https://github.com/fionera)). +* Add a HTTP endpoint for checking if Keeper is ready to accept traffic. [#55876](https://github.com/ClickHouse/ClickHouse/pull/55876) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add 'union' mode for schema inference. In this mode the resulting table schema is the union of all files schemas (so schema is inferred from each file). The mode of schema inference is controlled by a setting `schema_inference_mode` with 2 possible values - `default` and `union`. Closes [#55428](https://github.com/ClickHouse/ClickHouse/issues/55428). [#55892](https://github.com/ClickHouse/ClickHouse/pull/55892) ([Kruglov Pavel](https://github.com/Avogar)). +* Add new setting `input_format_csv_try_infer_numbers_from_strings` that allows to infer numbers from strings in CSV format. Closes [#56455](https://github.com/ClickHouse/ClickHouse/issues/56455). [#56859](https://github.com/ClickHouse/ClickHouse/pull/56859) ([Kruglov Pavel](https://github.com/Avogar)). +* Refreshable materialized views. [#56946](https://github.com/ClickHouse/ClickHouse/pull/56946) ([Michael Kolupaev](https://github.com/al13n321)). +* Add more warnings on the number of databases, tables. [#57375](https://github.com/ClickHouse/ClickHouse/pull/57375) ([凌涛](https://github.com/lingtaolf)). +* Added a new mutation command `ALTER TABLE
APPLY DELETED MASK`, which allows to enforce applying of mask written by lightweight delete and to remove rows marked as deleted from disk. [#57433](https://github.com/ClickHouse/ClickHouse/pull/57433) ([Anton Popov](https://github.com/CurtizJ)). +* Added a new SQL function `sqid` to generate Sqids (https://sqids.org/), example: `SELECT sqid(125, 126)`. [#57512](https://github.com/ClickHouse/ClickHouse/pull/57512) ([Robert Schulze](https://github.com/rschu1ze)). +* Dictionary with `HASHED_ARRAY` (and `COMPLEX_KEY_HASHED_ARRAY`) layout supports `SHARDS` similarly to `HASHED`. [#57544](https://github.com/ClickHouse/ClickHouse/pull/57544) ([vdimir](https://github.com/vdimir)). +* Add asynchronous metrics for total primary key bytes and total allocated primary key bytes in memory. [#57551](https://github.com/ClickHouse/ClickHouse/pull/57551) ([Bharat Nallan](https://github.com/bharatnc)). +* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#57555](https://github.com/ClickHouse/ClickHouse/pull/57555) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add `FORMAT_BYTES` as an alias for `formatReadableSize`. [#57592](https://github.com/ClickHouse/ClickHouse/pull/57592) ([Bharat Nallan](https://github.com/bharatnc)). +* Add SHA512_256 function. [#57645](https://github.com/ClickHouse/ClickHouse/pull/57645) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow passing optional SESSION_TOKEN to `s3` table function. [#57850](https://github.com/ClickHouse/ClickHouse/pull/57850) ([Shani Elharrar](https://github.com/shanielh)). +* Clause `ORDER BY` now supports specifying `ALL`, meaning that ClickHouse sorts by all columns in the `SELECT` clause. Example: `SELECT col1, col2 FROM tab WHERE [...] ORDER BY ALL`. [#57875](https://github.com/ClickHouse/ClickHouse/pull/57875) ([zhongyuankai](https://github.com/zhongyuankai)). +* Added functions for punycode encoding/decoding: `punycodeEncode()` and `punycodeDecode()`. [#57969](https://github.com/ClickHouse/ClickHouse/pull/57969) ([Robert Schulze](https://github.com/rschu1ze)). +* This PR reproduces the implementation of `PASTE JOIN`, which allows users to join tables without `ON` clause. Example: ``` SQL SELECT * FROM ( SELECT number AS a FROM numbers(2) ) AS t1 PASTE JOIN ( SELECT number AS a FROM numbers(2) ORDER BY a DESC ) AS t2. [#57995](https://github.com/ClickHouse/ClickHouse/pull/57995) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* A handler `/binary` opens a visual viewer of symbols inside the ClickHouse binary. [#58211](https://github.com/ClickHouse/ClickHouse/pull/58211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Performance Improvement +* Made copy between s3 disks using a s3-server-side copy instead of copying through the buffer. Improves `BACKUP/RESTORE` operations and `clickhouse-disks copy` command. [#56744](https://github.com/ClickHouse/ClickHouse/pull/56744) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* HashJoin respects setting `max_joined_block_size_rows` and do not produce large blocks for `ALL JOIN`. [#56996](https://github.com/ClickHouse/ClickHouse/pull/56996) ([vdimir](https://github.com/vdimir)). +* Release memory for aggregation earlier. This may avoid unnecessary external aggregation. [#57691](https://github.com/ClickHouse/ClickHouse/pull/57691) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve performance of string serialization. [#57717](https://github.com/ClickHouse/ClickHouse/pull/57717) ([Maksim Kita](https://github.com/kitaisreal)). +* Support trivial count optimization for `Merge`-engine tables. [#57867](https://github.com/ClickHouse/ClickHouse/pull/57867) ([skyoct](https://github.com/skyoct)). +* Optimized aggregation in some cases. [#57872](https://github.com/ClickHouse/ClickHouse/pull/57872) ([Anton Popov](https://github.com/CurtizJ)). +* The `hasAny()` function can now take advantage of the full-text skipping indices. [#57878](https://github.com/ClickHouse/ClickHouse/pull/57878) ([Jpnock](https://github.com/Jpnock)). +* Function `if(cond, then, else)` (and its alias `cond ? : then : else`) were optimized to use branch-free evaluation. [#57885](https://github.com/ClickHouse/ClickHouse/pull/57885) ([zhanglistar](https://github.com/zhanglistar)). +* Extract non intersecting parts ranges from MergeTree table during FINAL processing. That way we can avoid additional FINAL logic for this non intersecting parts ranges. In case when amount of duplicate values with same primary key is low, performance will be almost the same as without FINAL. Improve reading performance for MergeTree FINAL when `do_not_merge_across_partitions_select_final` setting is set. [#58120](https://github.com/ClickHouse/ClickHouse/pull/58120) ([Maksim Kita](https://github.com/kitaisreal)). +* MergeTree automatically derive `do_not_merge_across_partitions_select_final` setting if partition key expression contains only columns from primary key expression. [#58218](https://github.com/ClickHouse/ClickHouse/pull/58218) ([Maksim Kita](https://github.com/kitaisreal)). +* Speedup MIN and MAX for native types. [#58231](https://github.com/ClickHouse/ClickHouse/pull/58231) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Make inserts into distributed tables handle updated cluster configuration properly. When the list of cluster nodes is dynamically updated, the Directory Monitor of the distribution table cannot sense the new node, and the Directory Monitor must be re-noded to sense it. [#42826](https://github.com/ClickHouse/ClickHouse/pull/42826) ([zhongyuankai](https://github.com/zhongyuankai)). +* Replace --no-system-tables with loading virtual tables of system database lazily. [#55271](https://github.com/ClickHouse/ClickHouse/pull/55271) ([Azat Khuzhin](https://github.com/azat)). +* Clickhouse-test print case sn, current time and case name in one test case. [#55710](https://github.com/ClickHouse/ClickHouse/pull/55710) ([guoxiaolong](https://github.com/guoxiaolongzte)). +* Do not allow creating replicated table with inconsistent merge params. [#56833](https://github.com/ClickHouse/ClickHouse/pull/56833) ([Duc Canh Le](https://github.com/canhld94)). +* Implement SLRU cache policy for filesystem cache. [#57076](https://github.com/ClickHouse/ClickHouse/pull/57076) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Show uncompressed size in `system.tables`, obtained from data parts' checksums [#56618](https://github.com/ClickHouse/ClickHouse/issues/56618). [#57186](https://github.com/ClickHouse/ClickHouse/pull/57186) ([Chen Lixiang](https://github.com/chenlx0)). +* Add `skip_unavailable_shards` as a setting for `Distributed` tables that is similar to the corresponding query-level setting. Closes [#43666](https://github.com/ClickHouse/ClickHouse/issues/43666). [#57218](https://github.com/ClickHouse/ClickHouse/pull/57218) ([Gagan Goel](https://github.com/tntnatbry)). +* Function `substring()` (aliases: `substr`, `mid`) can now be used with `Enum` types. Previously, the first function argument had to be a value of type `String` or `FixedString`. This improves compatibility with 3rd party tools such as Tableau via MySQL interface. [#57277](https://github.com/ClickHouse/ClickHouse/pull/57277) ([Serge Klochkov](https://github.com/slvrtrn)). +* Better hints when a table doesn't exist. [#57342](https://github.com/ClickHouse/ClickHouse/pull/57342) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow to overwrite `max_partition_size_to_drop` and `max_table_size_to_drop` server settings in query time. [#57452](https://github.com/ClickHouse/ClickHouse/pull/57452) ([Jordi Villar](https://github.com/jrdi)). +* Add support for read-only flag when connecting to the ZooKeeper server (fixes [#53749](https://github.com/ClickHouse/ClickHouse/issues/53749)). [#57479](https://github.com/ClickHouse/ClickHouse/pull/57479) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Fix possible distributed sends stuck due to "No such file or directory" (during recovering batch from disk). Fix possible issues with `error_count` from `system.distribution_queue` (in case of `distributed_directory_monitor_max_sleep_time_ms` >5min). Introduce profile event to track async INSERT failures - `DistributedAsyncInsertionFailures`. [#57480](https://github.com/ClickHouse/ClickHouse/pull/57480) ([Azat Khuzhin](https://github.com/azat)). +* The limit for the number of connections per endpoint for background fetches was raised from `15` to the value of `background_fetches_pool_size` setting. - MergeTree-level setting `replicated_max_parallel_fetches_for_host` became obsolete - MergeTree-level settings `replicated_fetches_http_connection_timeout`, `replicated_fetches_http_send_timeout` and `replicated_fetches_http_receive_timeout` are moved to the Server-level. - Setting `keep_alive_timeout` is added to the list of Server-level settings. [#57523](https://github.com/ClickHouse/ClickHouse/pull/57523) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* It is now possible to refer to ALIAS column in index (non-primary-key) definitions (issue [#55650](https://github.com/ClickHouse/ClickHouse/issues/55650)). Example: `CREATE TABLE tab(col UInt32, col_alias ALIAS col + 1, INDEX idx (col_alias) TYPE minmax) ENGINE = MergeTree ORDER BY col;`. [#57546](https://github.com/ClickHouse/ClickHouse/pull/57546) ([Robert Schulze](https://github.com/rschu1ze)). +* Function `format()` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). This is important to calculate `SELECT format('The {0} to all questions is {1}', 'answer', 42)`. [#57549](https://github.com/ClickHouse/ClickHouse/pull/57549) ([Robert Schulze](https://github.com/rschu1ze)). +* Support PostgreSQL generated columns and default column values in `MaterializedPostgreSQL`. Closes [#40449](https://github.com/ClickHouse/ClickHouse/issues/40449). [#57568](https://github.com/ClickHouse/ClickHouse/pull/57568) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to apply some filesystem cache config settings changes without server restart. [#57578](https://github.com/ClickHouse/ClickHouse/pull/57578) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)). +* Allows to use the `date_trunc()` function with the first argument not depending on the case of it. Both cases are now supported: `SELECT date_trunc('day', now())` and `SELECT date_trunc('DAY', now())`. [#57624](https://github.com/ClickHouse/ClickHouse/pull/57624) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Expose the total number of errors occurred since last server as a `ClickHouseErrorMetric_ALL` metric. [#57627](https://github.com/ClickHouse/ClickHouse/pull/57627) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow nodes in config with from_env/from_zk and non empty element with replace=1. [#57628](https://github.com/ClickHouse/ClickHouse/pull/57628) ([Azat Khuzhin](https://github.com/azat)). +* Generate malformed output that cannot be parsed as JSON. [#57646](https://github.com/ClickHouse/ClickHouse/pull/57646) ([Julia Kartseva](https://github.com/jkartseva)). +* Consider lightweight deleted rows when selecting parts to merge if enabled. [#57648](https://github.com/ClickHouse/ClickHouse/pull/57648) ([Zhuo Qiu](https://github.com/jewelzqiu)). +* Make querying system.filesystem_cache not memory intensive. [#57687](https://github.com/ClickHouse/ClickHouse/pull/57687) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow IPv6 to UInt128 conversion and binary arithmetic. [#57707](https://github.com/ClickHouse/ClickHouse/pull/57707) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#57741](https://github.com/ClickHouse/ClickHouse/pull/57741) ([flynn](https://github.com/ucasfl)). +* Add a setting for `async inserts deduplication cache` -- how long we wait for cache update. Deprecate setting `async_block_ids_cache_min_update_interval_ms`. Now cache is updated only in case of conflicts. [#57743](https://github.com/ClickHouse/ClickHouse/pull/57743) ([alesapin](https://github.com/alesapin)). +* `sleep()` function now can be cancelled with `KILL QUERY`. [#57746](https://github.com/ClickHouse/ClickHouse/pull/57746) ([Vitaly Baranov](https://github.com/vitlibar)). +* Slightly better inference of unnamed tupes in JSON formats. [#57751](https://github.com/ClickHouse/ClickHouse/pull/57751) ([Kruglov Pavel](https://github.com/Avogar)). +* Refactor UserDefinedSQL* classes to make it possible to add SQL UDF storages which are different from ZooKeeper and Disk. [#57752](https://github.com/ClickHouse/ClickHouse/pull/57752) ([Natasha Chizhonkova](https://github.com/chizhonkova)). +* Forbid `CREATE TABLE ... AS SELECT` queries for Replicated table engines in Replicated database because they are broken. Reference [#35408](https://github.com/ClickHouse/ClickHouse/issues/35408). [#57796](https://github.com/ClickHouse/ClickHouse/pull/57796) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix and improve transform query for external database, we should recursively obtain all compatible predicates. [#57888](https://github.com/ClickHouse/ClickHouse/pull/57888) ([flynn](https://github.com/ucasfl)). +* Support dynamic reloading of filesystem cache size. Closes [#57866](https://github.com/ClickHouse/ClickHouse/issues/57866). [#57897](https://github.com/ClickHouse/ClickHouse/pull/57897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix system.stack_trace for threads with blocked SIGRTMIN. [#57907](https://github.com/ClickHouse/ClickHouse/pull/57907) ([Azat Khuzhin](https://github.com/azat)). +* Added a new setting `readonly` which can be used to specify a s3 disk is read only. It can be useful to create a table with read only `s3_plain` type disk. [#57977](https://github.com/ClickHouse/ClickHouse/pull/57977) ([Pengyuan Bian](https://github.com/bianpengyuan)). +* Support keeper failures in quorum check. [#57986](https://github.com/ClickHouse/ClickHouse/pull/57986) ([Raúl Marín](https://github.com/Algunenano)). +* Add max/peak RSS (`MemoryResidentMax`) into system.asynchronous_metrics. [#58095](https://github.com/ClickHouse/ClickHouse/pull/58095) ([Azat Khuzhin](https://github.com/azat)). +* Fix system.stack_trace for threads with blocked SIGRTMIN (and also send signal to the threads only if it is not blocked to avoid waiting `storage_system_stack_trace_pipe_read_timeout_ms` when it does not make any sense). [#58136](https://github.com/ClickHouse/ClickHouse/pull/58136) ([Azat Khuzhin](https://github.com/azat)). +* This PR allows users to use s3 links (`https://` and `s3://`) without mentioning region if it's not default. Also find the correct region if the user mentioned the wrong one. ### Documentation entry for user-facing changes. [#58148](https://github.com/ClickHouse/ClickHouse/pull/58148) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* `clickhouse-format --obfuscate` will know about Settings, MergeTreeSettings, and time zones and keep their names unchanged. [#58179](https://github.com/ClickHouse/ClickHouse/pull/58179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added explicit `finalize()` function in `ZipArchiveWriter`. Simplify too complicated code in `ZipArchiveWriter`. This PR fixes [#58074](https://github.com/ClickHouse/ClickHouse/issues/58074). [#58202](https://github.com/ClickHouse/ClickHouse/pull/58202) ([Vitaly Baranov](https://github.com/vitlibar)). +* The primary key analysis in MergeTree tables will now be applied to predicates that include the virtual column `_part_offset` (optionally with `_part`). This feature can serve as a poor man's secondary index. [#58224](https://github.com/ClickHouse/ClickHouse/pull/58224) ([Amos Bird](https://github.com/amosbird)). +* Make caches with the same path use the same cache objects. This behaviour existed before, but was broken in https://github.com/ClickHouse/ClickHouse/pull/48805 (in 23.4). If such caches with the same path have different set of cache settings, an exception will be thrown, that this is not allowed. [#58264](https://github.com/ClickHouse/ClickHouse/pull/58264) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Allow usage of Azure-related table engines/functions on macOS. [#51866](https://github.com/ClickHouse/ClickHouse/pull/51866) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse Fast Test now uses Musl instead of GLibc. [#57711](https://github.com/ClickHouse/ClickHouse/pull/57711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Run ClickBench for every commit. This closes [#57708](https://github.com/ClickHouse/ClickHouse/issues/57708). [#57712](https://github.com/ClickHouse/ClickHouse/pull/57712) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fixed a sorting order breakage in TTL GROUP BY [#49103](https://github.com/ClickHouse/ClickHouse/pull/49103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* fix: split lttb bucket strategy, first bucket and last bucket should only contain single point [#57003](https://github.com/ClickHouse/ClickHouse/pull/57003) ([FFish](https://github.com/wxybear)). +* Fix possible deadlock in Template format during sync after error [#57004](https://github.com/ClickHouse/ClickHouse/pull/57004) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix early stop while parsing file with skipping lots of errors [#57006](https://github.com/ClickHouse/ClickHouse/pull/57006) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent dictionary's ACL bypass via dictionary() table function [#57362](https://github.com/ClickHouse/ClickHouse/pull/57362) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix another case of non-ready set. [#57423](https://github.com/ClickHouse/ClickHouse/pull/57423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix several issues regarding PostgreSQL `array_ndims` usage. [#57436](https://github.com/ClickHouse/ClickHouse/pull/57436) ([Ryan Jacobs](https://github.com/ryanmjacobs)). +* Fix RWLock inconsistency after write lock timeout [#57454](https://github.com/ClickHouse/ClickHouse/pull/57454) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix: don't exclude ephemeral column when building pushing to view chain [#57461](https://github.com/ClickHouse/ClickHouse/pull/57461) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* MaterializedPostgreSQL: fix issue [#41922](https://github.com/ClickHouse/ClickHouse/issues/41922), add test for [#41923](https://github.com/ClickHouse/ClickHouse/issues/41923) [#57515](https://github.com/ClickHouse/ClickHouse/pull/57515) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix crash in clickhouse-local [#57553](https://github.com/ClickHouse/ClickHouse/pull/57553) ([Nikolay Degterinsky](https://github.com/evillique)). +* Materialize block in HashJoin for Type::EMPTY [#57564](https://github.com/ClickHouse/ClickHouse/pull/57564) ([vdimir](https://github.com/vdimir)). +* Fix possible segfault in PostgreSQLSource [#57567](https://github.com/ClickHouse/ClickHouse/pull/57567) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix type correction in HashJoin for nested low cardinality [#57614](https://github.com/ClickHouse/ClickHouse/pull/57614) ([vdimir](https://github.com/vdimir)). +* Avoid hangs of system.stack_trace by correctly prohibit parallel read from it [#57641](https://github.com/ClickHouse/ClickHouse/pull/57641) ([Azat Khuzhin](https://github.com/azat)). +* Fix SIGSEGV for aggregation of sparse columns with any() RESPECT NULL [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)). +* Fix unary operators parsing [#57713](https://github.com/ClickHouse/ClickHouse/pull/57713) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix RWLock inconsistency after write lock timeout (again) [#57733](https://github.com/ClickHouse/ClickHouse/pull/57733) ([Vitaly Baranov](https://github.com/vitlibar)). +* Table engine MaterializedPostgreSQL fix dependency loading [#57754](https://github.com/ClickHouse/ClickHouse/pull/57754) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix retries for disconnected nodes for BACKUP/RESTORE ON CLUSTER [#57764](https://github.com/ClickHouse/ClickHouse/pull/57764) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631) [#57766](https://github.com/ClickHouse/ClickHouse/pull/57766) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix result of external aggregation in case of partially materialized projection [#57790](https://github.com/ClickHouse/ClickHouse/pull/57790) ([Anton Popov](https://github.com/CurtizJ)). +* Fix merge in aggregation functions with `*Map` combinator [#57795](https://github.com/ClickHouse/ClickHouse/pull/57795) ([Anton Popov](https://github.com/CurtizJ)). +* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)). +* Fix low-cardinality keys support in MergeJoin [#57827](https://github.com/ClickHouse/ClickHouse/pull/57827) ([vdimir](https://github.com/vdimir)). +* Create consumers for Kafka tables on fly (but keep them for some period since last used) [#57829](https://github.com/ClickHouse/ClickHouse/pull/57829) ([Azat Khuzhin](https://github.com/azat)). +* InterpreterCreateQuery sample block fix [#57855](https://github.com/ClickHouse/ClickHouse/pull/57855) ([Maksim Kita](https://github.com/kitaisreal)). +* bugfix: addresses_expr ignored for psql named collections [#57874](https://github.com/ClickHouse/ClickHouse/pull/57874) ([joelynch](https://github.com/joelynch)). +* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). +* Resurrect `arrayFold()` [#57879](https://github.com/ClickHouse/ClickHouse/pull/57879) ([Robert Schulze](https://github.com/rschu1ze)). +* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)). +* Fix literal alias misclassification [#57988](https://github.com/ClickHouse/ClickHouse/pull/57988) ([Chen768959](https://github.com/Chen768959)). +* Revert "Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631)" [#58031](https://github.com/ClickHouse/ClickHouse/pull/58031) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)). +* Fix parallel replicas in presence of a scalar subquery with a big integer value [#58118](https://github.com/ClickHouse/ClickHouse/pull/58118) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `accurateCastOrNull` for out-of-range DateTime [#58139](https://github.com/ClickHouse/ClickHouse/pull/58139) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix possible PARAMETER_OUT_OF_BOUND error during subcolumns reading from wide part in MergeTree [#58175](https://github.com/ClickHouse/ClickHouse/pull/58175) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix CREATE VIEW hang [#58220](https://github.com/ClickHouse/ClickHouse/pull/58220) ([Tao Wang](https://github.com/wangtZJU)). +* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "Update Sentry""'. [#57694](https://github.com/ClickHouse/ClickHouse/pull/57694) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix RWLock inconsistency after write lock timeout"'. [#57730](https://github.com/ClickHouse/ClickHouse/pull/57730) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "improve CI with digest for docker, build and test jobs"'. [#57903](https://github.com/ClickHouse/ClickHouse/pull/57903) ([Max K.](https://github.com/mkaynov)). +* NO CL ENTRY: 'Reapply "improve CI with digest for docker, build and test jobs"'. [#57904](https://github.com/ClickHouse/ClickHouse/pull/57904) ([Max K.](https://github.com/mkaynov)). +* NO CL ENTRY: 'Revert "Merge pull request [#56573](https://github.com/ClickHouse/ClickHouse/issues/56573) from mkmkme/mkmkme/reload-config"'. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add system.dropped_tables_parts table"'. [#58022](https://github.com/ClickHouse/ClickHouse/pull/58022) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "Consider lightweight deleted rows when selecting parts to merge"'. [#58097](https://github.com/ClickHouse/ClickHouse/pull/58097) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix leftover processes/hangs in tests"'. [#58207](https://github.com/ClickHouse/ClickHouse/pull/58207) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Create consumers for Kafka tables on fly (but keep them for some period since last used)"'. [#58272](https://github.com/ClickHouse/ClickHouse/pull/58272) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Implement punycode encoding/decoding"'. [#58277](https://github.com/ClickHouse/ClickHouse/pull/58277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Randomize more settings [#39663](https://github.com/ClickHouse/ClickHouse/pull/39663) ([Anton Popov](https://github.com/CurtizJ)). +* Add more tests for `compile_expressions` [#51113](https://github.com/ClickHouse/ClickHouse/pull/51113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [RFC] Correctly wait background threads [#52717](https://github.com/ClickHouse/ClickHouse/pull/52717) ([Azat Khuzhin](https://github.com/azat)). +* improve CI with digest for docker, build and test jobs [#56317](https://github.com/ClickHouse/ClickHouse/pull/56317) ([Max K.](https://github.com/mkaynov)). +* Prepare the introduction of more keeper faults [#56917](https://github.com/ClickHouse/ClickHouse/pull/56917) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer: Fix assert in tryReplaceAndEqualsChainsWithConstant [#57139](https://github.com/ClickHouse/ClickHouse/pull/57139) ([vdimir](https://github.com/vdimir)). +* Check what will happen if we build ClickHouse with Musl [#57180](https://github.com/ClickHouse/ClickHouse/pull/57180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* support memory soft limit for keeper [#57271](https://github.com/ClickHouse/ClickHouse/pull/57271) ([Han Fei](https://github.com/hanfei1991)). +* Randomize disabled optimizations in CI [#57315](https://github.com/ClickHouse/ClickHouse/pull/57315) ([Raúl Marín](https://github.com/Algunenano)). +* Don't throw if noop when dropping database replica in batch [#57337](https://github.com/ClickHouse/ClickHouse/pull/57337) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Better JSON -> JSONEachRow fallback without catching exceptions [#57364](https://github.com/ClickHouse/ClickHouse/pull/57364) ([Kruglov Pavel](https://github.com/Avogar)). +* Add tests for [#48496](https://github.com/ClickHouse/ClickHouse/issues/48496) [#57414](https://github.com/ClickHouse/ClickHouse/pull/57414) ([Raúl Marín](https://github.com/Algunenano)). +* Add profile event for cache lookup in `ThreadPoolRemoteFSReader` [#57437](https://github.com/ClickHouse/ClickHouse/pull/57437) ([Nikita Taranov](https://github.com/nickitat)). +* Remove select() usage [#57467](https://github.com/ClickHouse/ClickHouse/pull/57467) ([Igor Nikonov](https://github.com/devcrafter)). +* Parallel replicas: friendly settings [#57542](https://github.com/ClickHouse/ClickHouse/pull/57542) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix formatting string prompt error [#57569](https://github.com/ClickHouse/ClickHouse/pull/57569) ([skyoct](https://github.com/skyoct)). +* Tune CI scale up/down multipliers [#57572](https://github.com/ClickHouse/ClickHouse/pull/57572) ([Max K.](https://github.com/mkaynov)). +* Revert "Revert "Implemented series period detect method using pocketfft lib"" [#57574](https://github.com/ClickHouse/ClickHouse/pull/57574) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Correctly handle errors during opening query in editor in client [#57587](https://github.com/ClickHouse/ClickHouse/pull/57587) ([Azat Khuzhin](https://github.com/azat)). +* Add a test for [#55251](https://github.com/ClickHouse/ClickHouse/issues/55251) [#57588](https://github.com/ClickHouse/ClickHouse/pull/57588) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a test for [#48039](https://github.com/ClickHouse/ClickHouse/issues/48039) [#57593](https://github.com/ClickHouse/ClickHouse/pull/57593) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update CHANGELOG.md [#57594](https://github.com/ClickHouse/ClickHouse/pull/57594) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version after release [#57595](https://github.com/ClickHouse/ClickHouse/pull/57595) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.11.1.2711-stable [#57597](https://github.com/ClickHouse/ClickHouse/pull/57597) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Identify failed jobs in lambda and mark as steps=0 [#57600](https://github.com/ClickHouse/ClickHouse/pull/57600) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky test: distinct in order with analyzer [#57606](https://github.com/ClickHouse/ClickHouse/pull/57606) ([Igor Nikonov](https://github.com/devcrafter)). +* CHJIT add assembly printer [#57610](https://github.com/ClickHouse/ClickHouse/pull/57610) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix parsing virtual hosted S3 URI in clickhouse_backupview script [#57612](https://github.com/ClickHouse/ClickHouse/pull/57612) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Fix docs for `fileCluster` [#57613](https://github.com/ClickHouse/ClickHouse/pull/57613) ([Andrey Zvonov](https://github.com/zvonand)). +* Analyzer: Fix logical error in MultiIfToIfPass [#57622](https://github.com/ClickHouse/ClickHouse/pull/57622) ([vdimir](https://github.com/vdimir)). +* Throw more clear exception [#57626](https://github.com/ClickHouse/ClickHouse/pull/57626) ([alesapin](https://github.com/alesapin)). +* Fix "logs and exception messages formatting", part 1 [#57630](https://github.com/ClickHouse/ClickHouse/pull/57630) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "logs and exception messages formatting", part 2 [#57632](https://github.com/ClickHouse/ClickHouse/pull/57632) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "logs and exception messages formatting", part 3 [#57633](https://github.com/ClickHouse/ClickHouse/pull/57633) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "logs and exception messages formatting", part 4 [#57634](https://github.com/ClickHouse/ClickHouse/pull/57634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad test (1) [#57636](https://github.com/ClickHouse/ClickHouse/pull/57636) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad test (2) [#57637](https://github.com/ClickHouse/ClickHouse/pull/57637) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse Cloud promotion [#57638](https://github.com/ClickHouse/ClickHouse/pull/57638) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove bad test (3) [#57639](https://github.com/ClickHouse/ClickHouse/pull/57639) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad test (4) [#57640](https://github.com/ClickHouse/ClickHouse/pull/57640) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Random changes in random files [#57642](https://github.com/ClickHouse/ClickHouse/pull/57642) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Merge half of [#51113](https://github.com/ClickHouse/ClickHouse/issues/51113) [#57643](https://github.com/ClickHouse/ClickHouse/pull/57643) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer: Fix JOIN ON true with join_use_nulls [#57662](https://github.com/ClickHouse/ClickHouse/pull/57662) ([vdimir](https://github.com/vdimir)). +* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add support for system.stack_trace filtering optimizations for analyzer [#57682](https://github.com/ClickHouse/ClickHouse/pull/57682) ([Azat Khuzhin](https://github.com/azat)). +* test for [#33308](https://github.com/ClickHouse/ClickHouse/issues/33308) [#57693](https://github.com/ClickHouse/ClickHouse/pull/57693) ([Denny Crane](https://github.com/den-crane)). +* support keeper memory soft limit ratio [#57699](https://github.com/ClickHouse/ClickHouse/pull/57699) ([Han Fei](https://github.com/hanfei1991)). +* Fix test_dictionaries_update_and_reload/test.py::test_reload_while_loading flakiness [#57714](https://github.com/ClickHouse/ClickHouse/pull/57714) ([Azat Khuzhin](https://github.com/azat)). +* Tune autoscale to scale for single job in the queue [#57742](https://github.com/ClickHouse/ClickHouse/pull/57742) ([Max K.](https://github.com/mkaynov)). +* Tune network memory for dockerhub proxy hosts [#57744](https://github.com/ClickHouse/ClickHouse/pull/57744) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Parallel replicas: announcement response handling improvement [#57749](https://github.com/ClickHouse/ClickHouse/pull/57749) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix building Rust with Musl [#57756](https://github.com/ClickHouse/ClickHouse/pull/57756) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test_parallel_replicas_distributed_read_from_all [#57757](https://github.com/ClickHouse/ClickHouse/pull/57757) ([Igor Nikonov](https://github.com/devcrafter)). +* Minor refactoring of toStartOfInterval() [#57761](https://github.com/ClickHouse/ClickHouse/pull/57761) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't run test 02919_skip_lots_of_parsing_errors on aarch64 [#57762](https://github.com/ClickHouse/ClickHouse/pull/57762) ([Kruglov Pavel](https://github.com/Avogar)). +* More respect to `min_number_of_marks` in `ParallelReplicasReadingCoordinator` [#57763](https://github.com/ClickHouse/ClickHouse/pull/57763) ([Nikita Taranov](https://github.com/nickitat)). +* SerializationString reduce memory usage [#57787](https://github.com/ClickHouse/ClickHouse/pull/57787) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix ThreadSanitizer data race in librdkafka [#57791](https://github.com/ClickHouse/ClickHouse/pull/57791) ([Ilya Golshtein](https://github.com/ilejn)). +* Rename `system.async_loader` into `system.asynchronous_loader` [#57793](https://github.com/ClickHouse/ClickHouse/pull/57793) ([Sergei Trifonov](https://github.com/serxa)). +* Set replica number to its position in cluster definition [#57800](https://github.com/ClickHouse/ClickHouse/pull/57800) ([Nikita Taranov](https://github.com/nickitat)). +* fix clickhouse-client invocation in 02327_capnproto_protobuf_empty_messages [#57804](https://github.com/ClickHouse/ClickHouse/pull/57804) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Fix flaky test_parallel_replicas_over_distributed [#57809](https://github.com/ClickHouse/ClickHouse/pull/57809) ([Igor Nikonov](https://github.com/devcrafter)). +* Revert [#57741](https://github.com/ClickHouse/ClickHouse/issues/57741) [#57811](https://github.com/ClickHouse/ClickHouse/pull/57811) ([Raúl Marín](https://github.com/Algunenano)). +* Dumb down `substring()` tests [#57821](https://github.com/ClickHouse/ClickHouse/pull/57821) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v23.11.2.11-stable [#57824](https://github.com/ClickHouse/ClickHouse/pull/57824) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix 02906_force_optimize_projection_name [#57826](https://github.com/ClickHouse/ClickHouse/pull/57826) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* ClickBench: slightly better [#57831](https://github.com/ClickHouse/ClickHouse/pull/57831) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02932_kill_query_sleep flakiness [#57849](https://github.com/ClickHouse/ClickHouse/pull/57849) ([Azat Khuzhin](https://github.com/azat)). +* Revert "Replace --no-system-tables with loading virtual tables of system database lazily" [#57851](https://github.com/ClickHouse/ClickHouse/pull/57851) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory leak in StorageHDFS [#57860](https://github.com/ClickHouse/ClickHouse/pull/57860) ([Andrey Zvonov](https://github.com/zvonand)). +* Remove hardcoded clickhouse-client invocations from tests [#57861](https://github.com/ClickHouse/ClickHouse/pull/57861) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Follow up to [#57568](https://github.com/ClickHouse/ClickHouse/issues/57568) [#57863](https://github.com/ClickHouse/ClickHouse/pull/57863) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix assertion in HashJoin [#57873](https://github.com/ClickHouse/ClickHouse/pull/57873) ([vdimir](https://github.com/vdimir)). +* More efficient constructor for SerializationEnum [#57887](https://github.com/ClickHouse/ClickHouse/pull/57887) ([Duc Canh Le](https://github.com/canhld94)). +* Fix test_unset_skip_unavailable_shards [#57895](https://github.com/ClickHouse/ClickHouse/pull/57895) ([Raúl Marín](https://github.com/Algunenano)). +* Add argument to fill the gap in cherry-pick [#57896](https://github.com/ClickHouse/ClickHouse/pull/57896) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Delete debug logging in OutputFormatWithUTF8ValidationAdaptor [#57899](https://github.com/ClickHouse/ClickHouse/pull/57899) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improvements for 00002_log_and_exception_messages_formatting [#57910](https://github.com/ClickHouse/ClickHouse/pull/57910) ([Raúl Marín](https://github.com/Algunenano)). +* Update CHANGELOG.md [#57911](https://github.com/ClickHouse/ClickHouse/pull/57911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* remove cruft from TablesLoader [#57938](https://github.com/ClickHouse/ClickHouse/pull/57938) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix `/dashboard` work with passwords [#57948](https://github.com/ClickHouse/ClickHouse/pull/57948) ([Sergei Trifonov](https://github.com/serxa)). +* Remove wrong test [#57950](https://github.com/ClickHouse/ClickHouse/pull/57950) ([Sergei Trifonov](https://github.com/serxa)). +* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)). +* Remove C++ templates (normalizeQuery) [#57963](https://github.com/ClickHouse/ClickHouse/pull/57963) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* A small fix for dashboard [#57964](https://github.com/ClickHouse/ClickHouse/pull/57964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Always use `pread` for reading cache segments [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)). +* Improve some tests [#57973](https://github.com/ClickHouse/ClickHouse/pull/57973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert "Merge pull request [#57907](https://github.com/ClickHouse/ClickHouse/issues/57907) from azat/system.stack_trace-rt_tgsigqueueinfo" [#57974](https://github.com/ClickHouse/ClickHouse/pull/57974) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708) [#57979](https://github.com/ClickHouse/ClickHouse/pull/57979) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix style-check checkout head-ref [#57989](https://github.com/ClickHouse/ClickHouse/pull/57989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* refine error message [#57991](https://github.com/ClickHouse/ClickHouse/pull/57991) ([Han Fei](https://github.com/hanfei1991)). +* CI for docs only fix [#57992](https://github.com/ClickHouse/ClickHouse/pull/57992) ([Max K.](https://github.com/mkaynov)). +* Replace rust's BLAKE3 with llvm's implementation [#57994](https://github.com/ClickHouse/ClickHouse/pull/57994) ([Raúl Marín](https://github.com/Algunenano)). +* Better trivial count optimization for storage `Merge` [#57996](https://github.com/ClickHouse/ClickHouse/pull/57996) ([Anton Popov](https://github.com/CurtizJ)). +* enhanced docs for `date_trunc()` [#58000](https://github.com/ClickHouse/ClickHouse/pull/58000) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* CI: add needs_changed_files flag for pr_info [#58003](https://github.com/ClickHouse/ClickHouse/pull/58003) ([Max K.](https://github.com/mkaynov)). +* more messages in ci [#58007](https://github.com/ClickHouse/ClickHouse/pull/58007) ([Sema Checherinda](https://github.com/CheSema)). +* Test parallel replicas with force_primary_key setting [#58010](https://github.com/ClickHouse/ClickHouse/pull/58010) ([Igor Nikonov](https://github.com/devcrafter)). +* Update 00002_log_and_exception_messages_formatting.sql [#58012](https://github.com/ClickHouse/ClickHouse/pull/58012) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)). +* Fix segfault in FuzzJSON engine [#58015](https://github.com/ClickHouse/ClickHouse/pull/58015) ([Julia Kartseva](https://github.com/jkartseva)). +* fix freebsd build [#58019](https://github.com/ClickHouse/ClickHouse/pull/58019) ([Julia Kartseva](https://github.com/jkartseva)). +* Rename canUseParallelReplicas to canUseTaskBasedParallelReplicas [#58025](https://github.com/ClickHouse/ClickHouse/pull/58025) ([Raúl Marín](https://github.com/Algunenano)). +* Remove fixed tests from analyzer_tech_debt.txt [#58028](https://github.com/ClickHouse/ClickHouse/pull/58028) ([Raúl Marín](https://github.com/Algunenano)). +* More verbose errors on 00002_log_and_exception_messages_formatting [#58037](https://github.com/ClickHouse/ClickHouse/pull/58037) ([Raúl Marín](https://github.com/Algunenano)). +* Make window insert result into constant [#58045](https://github.com/ClickHouse/ClickHouse/pull/58045) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* CI: Happy new year [#58046](https://github.com/ClickHouse/ClickHouse/pull/58046) ([Raúl Marín](https://github.com/Algunenano)). +* Follow up for [#57691](https://github.com/ClickHouse/ClickHouse/issues/57691) [#58048](https://github.com/ClickHouse/ClickHouse/pull/58048) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* always run ast_fuzz and sqllancer [#58049](https://github.com/ClickHouse/ClickHouse/pull/58049) ([Max K.](https://github.com/mkaynov)). +* Add GH status for PR formating [#58050](https://github.com/ClickHouse/ClickHouse/pull/58050) ([Max K.](https://github.com/mkaynov)). +* Small improvement for SystemLogBase [#58051](https://github.com/ClickHouse/ClickHouse/pull/58051) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Bump Azure to v1.6.0 [#58052](https://github.com/ClickHouse/ClickHouse/pull/58052) ([Robert Schulze](https://github.com/rschu1ze)). +* Correct values for randomization [#58058](https://github.com/ClickHouse/ClickHouse/pull/58058) ([Anton Popov](https://github.com/CurtizJ)). +* Non post request should be readonly [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)). +* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test… [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)). +* fix typo in the test 02479 [#58072](https://github.com/ClickHouse/ClickHouse/pull/58072) ([Sema Checherinda](https://github.com/CheSema)). +* Bump Azure to 1.7.2 [#58075](https://github.com/ClickHouse/ClickHouse/pull/58075) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky test `02567_and_consistency` [#58076](https://github.com/ClickHouse/ClickHouse/pull/58076) ([Anton Popov](https://github.com/CurtizJ)). +* Fix Tests Bugfix Validate Check [#58078](https://github.com/ClickHouse/ClickHouse/pull/58078) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix for nightly job for digest-ci [#58079](https://github.com/ClickHouse/ClickHouse/pull/58079) ([Max K.](https://github.com/mkaynov)). +* Test for parallel replicas with remote() [#58081](https://github.com/ClickHouse/ClickHouse/pull/58081) ([Igor Nikonov](https://github.com/devcrafter)). +* Minor cosmetic changes [#58092](https://github.com/ClickHouse/ClickHouse/pull/58092) ([Raúl Marín](https://github.com/Algunenano)). +* Reintroduce OPTIMIZE CLEANUP as no-op [#58100](https://github.com/ClickHouse/ClickHouse/pull/58100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add compatibility in the replication protocol for a removed feature [#58104](https://github.com/ClickHouse/ClickHouse/pull/58104) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Flaky 02922_analyzer_aggregate_nothing_type [#58105](https://github.com/ClickHouse/ClickHouse/pull/58105) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v23.11.3.23-stable [#58106](https://github.com/ClickHouse/ClickHouse/pull/58106) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Limited CI on the master for docs only change [#58121](https://github.com/ClickHouse/ClickHouse/pull/58121) ([Max K.](https://github.com/mkaynov)). +* style fix [#58125](https://github.com/ClickHouse/ClickHouse/pull/58125) ([Max K.](https://github.com/mkaynov)). +* Support "do not test" label with ci.py [#58128](https://github.com/ClickHouse/ClickHouse/pull/58128) ([Max K.](https://github.com/mkaynov)). +* Use the single images list for integration tests everywhere [#58130](https://github.com/ClickHouse/ClickHouse/pull/58130) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable parallel replicas with IN (subquery) [#58133](https://github.com/ClickHouse/ClickHouse/pull/58133) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix clang-tidy [#58134](https://github.com/ClickHouse/ClickHouse/pull/58134) ([Raúl Marín](https://github.com/Algunenano)). +* Run build report check job on build failures, fix [#58135](https://github.com/ClickHouse/ClickHouse/pull/58135) ([Max K.](https://github.com/mkaynov)). +* Fix dashboard legend sorting and rows number [#58151](https://github.com/ClickHouse/ClickHouse/pull/58151) ([Sergei Trifonov](https://github.com/serxa)). +* Remove retryStrategy assignments overwritten in ClientFactory::create() [#58163](https://github.com/ClickHouse/ClickHouse/pull/58163) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Helper improvements [#58164](https://github.com/ClickHouse/ClickHouse/pull/58164) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pass through exceptions for reading from S3 [#58165](https://github.com/ClickHouse/ClickHouse/pull/58165) ([Azat Khuzhin](https://github.com/azat)). +* [RFC] Adjust all std::ios implementations in poco to set failbit/badbit by default [#58166](https://github.com/ClickHouse/ClickHouse/pull/58166) ([Azat Khuzhin](https://github.com/azat)). +* Add bytes_uncompressed to system.part_log [#58167](https://github.com/ClickHouse/ClickHouse/pull/58167) ([Jordi Villar](https://github.com/jrdi)). +* Update docker/test/stateful/run.sh [#58168](https://github.com/ClickHouse/ClickHouse/pull/58168) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update 00165_jit_aggregate_functions.sql [#58169](https://github.com/ClickHouse/ClickHouse/pull/58169) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update clickhouse-test [#58170](https://github.com/ClickHouse/ClickHouse/pull/58170) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Profile event 'ParallelReplicasUsedCount' [#58173](https://github.com/ClickHouse/ClickHouse/pull/58173) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flaky test `02719_aggregate_with_empty_string_key` [#58176](https://github.com/ClickHouse/ClickHouse/pull/58176) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix [#58171](https://github.com/ClickHouse/ClickHouse/issues/58171) [#58177](https://github.com/ClickHouse/ClickHouse/pull/58177) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add base backup name to system.backups and system.backup_log tables [#58178](https://github.com/ClickHouse/ClickHouse/pull/58178) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Fix use-after-move [#58182](https://github.com/ClickHouse/ClickHouse/pull/58182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Looking at strange code [#58196](https://github.com/ClickHouse/ClickHouse/pull/58196) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix all Exception with missing arguments [#58198](https://github.com/ClickHouse/ClickHouse/pull/58198) ([Azat Khuzhin](https://github.com/azat)). +* Fix leftover processes/hangs in tests [#58200](https://github.com/ClickHouse/ClickHouse/pull/58200) ([Azat Khuzhin](https://github.com/azat)). +* Fix DWARFBlockInputFormat failing on DWARF 5 unit address ranges [#58204](https://github.com/ClickHouse/ClickHouse/pull/58204) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix error in archive reader [#58206](https://github.com/ClickHouse/ClickHouse/pull/58206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix DWARFBlockInputFormat using wrong base address sometimes [#58208](https://github.com/ClickHouse/ClickHouse/pull/58208) ([Michael Kolupaev](https://github.com/al13n321)). +* Add support for specifying query parameters in the command line in clickhouse-local [#58210](https://github.com/ClickHouse/ClickHouse/pull/58210) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Fix leftover processes/hangs in tests (resubmit) [#58213](https://github.com/ClickHouse/ClickHouse/pull/58213) ([Azat Khuzhin](https://github.com/azat)). +* Add optimization for AND notEquals chain in logical expression optimizer [#58214](https://github.com/ClickHouse/ClickHouse/pull/58214) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). +* Fix syntax and doc [#58221](https://github.com/ClickHouse/ClickHouse/pull/58221) ([San](https://github.com/santrancisco)). +* Cleanup some known short messages [#58226](https://github.com/ClickHouse/ClickHouse/pull/58226) ([Raúl Marín](https://github.com/Algunenano)). +* Some code refactoring (was an attempt to improve build time, but failed) [#58237](https://github.com/ClickHouse/ClickHouse/pull/58237) ([Azat Khuzhin](https://github.com/azat)). +* Fix perf test README [#58245](https://github.com/ClickHouse/ClickHouse/pull/58245) ([Raúl Marín](https://github.com/Algunenano)). +* [Analyzer] Add test for [#57086](https://github.com/ClickHouse/ClickHouse/issues/57086) [#58249](https://github.com/ClickHouse/ClickHouse/pull/58249) ([Raúl Marín](https://github.com/Algunenano)). +* Reintroduce compatibility with `is_deleted` on a syntax level [#58251](https://github.com/ClickHouse/ClickHouse/pull/58251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid throwing ABORTED on normal situations [#58252](https://github.com/ClickHouse/ClickHouse/pull/58252) ([Raúl Marín](https://github.com/Algunenano)). +* Remove mayBenefitFromIndexForIn [#58265](https://github.com/ClickHouse/ClickHouse/pull/58265) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow a few retries when committing a part during shutdown [#58269](https://github.com/ClickHouse/ClickHouse/pull/58269) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert [#58267](https://github.com/ClickHouse/ClickHouse/issues/58267) [#58274](https://github.com/ClickHouse/ClickHouse/pull/58274) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index a04d676e92d..eddf24448c1 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -28,18 +28,20 @@ sudo apt-get install clang-17 Let’s remember the path where we install `cctools` as ${CCTOOLS} ``` bash +mkdir ~/cctools export CCTOOLS=$(cd ~/cctools && pwd) -mkdir ${CCTOOLS} cd ${CCTOOLS} -git clone --depth=1 https://github.com/tpoechtrager/apple-libtapi.git +git clone https://github.com/tpoechtrager/apple-libtapi.git cd apple-libtapi +git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 INSTALLPREFIX=${CCTOOLS} ./build.sh ./install.sh cd .. -git clone --depth=1 https://github.com/tpoechtrager/cctools-port.git +git clone https://github.com/tpoechtrager/cctools-port.git cd cctools-port/cctools +git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 ./configure --prefix=$(readlink -f ${CCTOOLS}) --with-libtapi=$(readlink -f ${CCTOOLS}) --target=x86_64-apple-darwin make install ``` diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md index 088dd6f2679..b7cda515d77 100644 --- a/docs/en/development/build-cross-s390x.md +++ b/docs/en/development/build-cross-s390x.md @@ -1,206 +1,206 @@ ---- -slug: /en/development/build-cross-s390x -sidebar_position: 69 -title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux) -sidebar_label: Build on Linux for s390x (zLinux) ---- - -As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development. - - -## Building - -As s390x does not support boringssl, it uses OpenSSL and has two related build options. -- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.) -- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake -```bash --DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1 -``` - -These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04. - -In addition to installing the tooling used to build natively, the following additional packages need to be installed: - -```bash -apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static -``` - -If you wish to cross compile rust code install the rust cross compile target for s390x: -```bash -rustup target add s390x-unknown-linux-gnu -``` - -To build for s390x: -```bash -cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake .. -ninja -``` - -## Running - -Once built, the binary can be run with, eg.: - -```bash -qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse -``` - -## Debugging - -Install LLDB: - -```bash -apt-get install lldb-15 -``` - -To Debug a s390x executable, run clickhouse using QEMU in debug mode: - -```bash -qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse -``` - -In another shell run LLDB and attach, replace `` and `` with the values corresponding to your environment. -```bash -lldb-15 -(lldb) target create ./clickhouse -Current executable set to '//ClickHouse//programs/clickhouse' (s390x). -(lldb) settings set target.source-map //ClickHouse -(lldb) gdb-remote 31338 -Process 1 stopped -* thread #1, stop reason = signal SIGTRAP - frame #0: 0x0000004020e74cd0 --> 0x4020e74cd0: lgr %r2, %r15 - 0x4020e74cd4: aghi %r15, -160 - 0x4020e74cd8: xc 0(8,%r15), 0(%r15) - 0x4020e74cde: brasl %r14, 275429939040 -(lldb) b main -Breakpoint 1: 9 locations. -(lldb) c -Process 1 resuming -Process 1 stopped -* thread #1, stop reason = breakpoint 1.1 - frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17 - 447 #if !defined(FUZZING_MODE) - 448 int main(int argc_, char ** argv_) - 449 { --> 450 inside_main = true; - 451 SCOPE_EXIT({ inside_main = false; }); - 452 - 453 /// PHDR cache is required for query profiler to work reliably -``` - -## Visual Studio Code integration - -- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging. -- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md). -- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` -- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this) - -### Example configurations -#### cmake-variants.yaml -```yaml -buildType: - default: relwithdebinfo - choices: - debug: - short: Debug - long: Emit debug information - buildType: Debug - release: - short: Release - long: Optimize generated code - buildType: Release - relwithdebinfo: - short: RelWithDebInfo - long: Release with Debug Info - buildType: RelWithDebInfo - tsan: - short: MinSizeRel - long: Minimum Size Release - buildType: MinSizeRel - -toolchain: - default: default - description: Select toolchain - choices: - default: - short: x86_64 - long: x86_64 - s390x: - short: s390x - long: s390x - settings: - CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake -``` - -#### launch.json -```json -{ - "version": "0.2.0", - "configurations": [ - { - "type": "lldb", - "request": "custom", - "name": "(lldb) Launch s390x with qemu", - "targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"], - "processCreateCommands": ["gdb-remote 2159"], - "preLaunchTask": "Run ClickHouse" - } - ] -} -``` - -#### settings.json -This would also put different builds under different subfolders of the `build` folder. -```json -{ - "cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}", - "lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so" -} -``` - -#### run-debug.sh -```sh -#! /bin/sh -echo 'Starting debugger session' -cd $1 -qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4 -``` - -#### tasks.json -Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`. -```json -{ - "version": "2.0.0", - "tasks": [ - { - "label": "Run ClickHouse", - "type": "shell", - "isBackground": true, - "command": "${workspaceFolder}/.vscode/run-debug.sh", - "args": [ - "${command:cmake.launchTargetDirectory}/tmp", - "${command:cmake.launchTargetPath}", - "server", - "--config-file=${workspaceFolder}/programs/server/config.xml" - ], - "problemMatcher": [ - { - "pattern": [ - { - "regexp": ".", - "file": 1, - "location": 2, - "message": 3 - } - ], - "background": { - "activeOnStart": true, - "beginsPattern": "^Starting debugger session", - "endsPattern": ".*" - } - } - ] - } - ] -} -``` +--- +slug: /en/development/build-cross-s390x +sidebar_position: 69 +title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux) +sidebar_label: Build on Linux for s390x (zLinux) +--- + +As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development. + + +## Building + +As s390x does not support boringssl, it uses OpenSSL and has two related build options. +- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.) +- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake +```bash +-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1 +``` + +These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04. + +In addition to installing the tooling used to build natively, the following additional packages need to be installed: + +```bash +apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static +``` + +If you wish to cross compile rust code install the rust cross compile target for s390x: +```bash +rustup target add s390x-unknown-linux-gnu +``` + +To build for s390x: +```bash +cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake .. +ninja +``` + +## Running + +Once built, the binary can be run with, eg.: + +```bash +qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse +``` + +## Debugging + +Install LLDB: + +```bash +apt-get install lldb-15 +``` + +To Debug a s390x executable, run clickhouse using QEMU in debug mode: + +```bash +qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse +``` + +In another shell run LLDB and attach, replace `` and `` with the values corresponding to your environment. +```bash +lldb-15 +(lldb) target create ./clickhouse +Current executable set to '//ClickHouse//programs/clickhouse' (s390x). +(lldb) settings set target.source-map //ClickHouse +(lldb) gdb-remote 31338 +Process 1 stopped +* thread #1, stop reason = signal SIGTRAP + frame #0: 0x0000004020e74cd0 +-> 0x4020e74cd0: lgr %r2, %r15 + 0x4020e74cd4: aghi %r15, -160 + 0x4020e74cd8: xc 0(8,%r15), 0(%r15) + 0x4020e74cde: brasl %r14, 275429939040 +(lldb) b main +Breakpoint 1: 9 locations. +(lldb) c +Process 1 resuming +Process 1 stopped +* thread #1, stop reason = breakpoint 1.1 + frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17 + 447 #if !defined(FUZZING_MODE) + 448 int main(int argc_, char ** argv_) + 449 { +-> 450 inside_main = true; + 451 SCOPE_EXIT({ inside_main = false; }); + 452 + 453 /// PHDR cache is required for query profiler to work reliably +``` + +## Visual Studio Code integration + +- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging. +- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md). +- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` +- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this) + +### Example configurations +#### cmake-variants.yaml +```yaml +buildType: + default: relwithdebinfo + choices: + debug: + short: Debug + long: Emit debug information + buildType: Debug + release: + short: Release + long: Optimize generated code + buildType: Release + relwithdebinfo: + short: RelWithDebInfo + long: Release with Debug Info + buildType: RelWithDebInfo + tsan: + short: MinSizeRel + long: Minimum Size Release + buildType: MinSizeRel + +toolchain: + default: default + description: Select toolchain + choices: + default: + short: x86_64 + long: x86_64 + s390x: + short: s390x + long: s390x + settings: + CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake +``` + +#### launch.json +```json +{ + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "custom", + "name": "(lldb) Launch s390x with qemu", + "targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"], + "processCreateCommands": ["gdb-remote 2159"], + "preLaunchTask": "Run ClickHouse" + } + ] +} +``` + +#### settings.json +This would also put different builds under different subfolders of the `build` folder. +```json +{ + "cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}", + "lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so" +} +``` + +#### run-debug.sh +```sh +#! /bin/sh +echo 'Starting debugger session' +cd $1 +qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4 +``` + +#### tasks.json +Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`. +```json +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Run ClickHouse", + "type": "shell", + "isBackground": true, + "command": "${workspaceFolder}/.vscode/run-debug.sh", + "args": [ + "${command:cmake.launchTargetDirectory}/tmp", + "${command:cmake.launchTargetPath}", + "server", + "--config-file=${workspaceFolder}/programs/server/config.xml" + ], + "problemMatcher": [ + { + "pattern": [ + { + "regexp": ".", + "file": 1, + "location": 2, + "message": 3 + } + ], + "background": { + "activeOnStart": true, + "beginsPattern": "^Starting debugger session", + "endsPattern": ".*" + } + } + ] + } + ] +} +``` diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index e65de4a37e0..39ccc9a78c3 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -3,7 +3,7 @@ slug: /en/development/build-osx sidebar_position: 65 sidebar_label: Build on macOS title: How to Build ClickHouse on macOS -description: How to build ClickHouse on macOS +description: How to build ClickHouse on macOS for macOS --- :::info You don't have to build ClickHouse yourself! diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 9d6a80de904..31346c77949 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -7,42 +7,39 @@ description: Prerequisites and an overview of how to build ClickHouse # Getting Started Guide for Building ClickHouse -The building of ClickHouse is supported on Linux, FreeBSD and macOS. +ClickHouse can be build on Linux, FreeBSD and macOS. If you use Windows, you can still build ClickHouse in a virtual machine running Linux, e.g. [VirtualBox](https://www.virtualbox.org/) with Ubuntu. -If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. - -ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading. +ClickHouse requires a 64-bit system to compile and run, 32-bit systems do not work. ## Creating a Repository on GitHub {#creating-a-repository-on-github} -To start working with ClickHouse repository you will need a GitHub account. +To start developing for ClickHouse you will need a [GitHub](https://www.virtualbox.org/) account. Please also generate a SSH key locally (if you don't have one already) and upload the public key to GitHub as this is a prerequisite for contributing patches. -You probably already have one, but if you do not, please register at https://github.com. In case you do not have SSH keys, you should generate them and then upload them on GitHub. It is required for sending over your patches. It is also possible to use the same SSH keys that you use with any other SSH servers - probably you already have those. +Next, create a fork of the [ClickHouse repository](https://github.com/ClickHouse/ClickHouse/) in your personal account by clicking the "fork" button in the upper right corner. -Create a fork of ClickHouse repository. To do that please click on the “fork” button in the upper right corner at https://github.com/ClickHouse/ClickHouse. It will fork your own copy of ClickHouse/ClickHouse to your account. +To contribute, e.g. a fix for an issue or a feature, please commit your changes to a branch in your fork, then create a "pull request" with the changes to the main repository. -The development process consists of first committing the intended changes into your fork of ClickHouse and then creating a “pull request” for these changes to be accepted into the main repository (ClickHouse/ClickHouse). +For working with Git repositories, please install `git`. In Ubuntu run these commands in a terminal: -To work with Git repositories, please install `git`. To do that in Ubuntu you would run in the command line terminal: +```sh +sudo apt update +sudo apt install git +``` - sudo apt update - sudo apt install git - -A brief manual on using Git can be found [here](https://education.github.com/git-cheat-sheet-education.pdf). -For a detailed manual on Git see [here](https://git-scm.com/book/en/v2). +A cheatsheet for using Git can be found [here](https://education.github.com/git-cheat-sheet-education.pdf). The detailed manual for Git is [here](https://git-scm.com/book/en/v2). ## Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine} -Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine. +First, download the source files to your working machine, i.e. clone the repository: -Run in your terminal: +```sh +git clone git@github.com:your_github_username/ClickHouse.git # replace placeholder with your GitHub user name +cd ClickHouse +``` - git clone git@github.com:your_github_username/ClickHouse.git # replace placeholder with your GitHub user name - cd ClickHouse +This command creates a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory after the URL but it is important that this path does not contain whitespaces as it may lead to problems with the build later on. -This command will create a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory (after the URL), it is important that this path does not contain whitespaces as it may lead to problems with the build system. - -To make library dependencies available for the build, the ClickHouse repository uses Git submodules, i.e. references to external repositories. These are not checked out by default. To do so, you can either +The ClickHouse repository uses Git submodules, i.e. references to external repositories (usually 3rd party libraries used by ClickHouse). These are not checked out by default. To do so, you can either - run `git clone` with option `--recurse-submodules`, @@ -52,7 +49,7 @@ To make library dependencies available for the build, the ClickHouse repository You can check the Git status with the command: `git submodule status`. -If you get the following error message: +If you get the following error message Permission denied (publickey). fatal: Could not read from remote repository. @@ -60,7 +57,7 @@ If you get the following error message: Please make sure you have the correct access rights and the repository exists. -It generally means that the SSH keys for connecting to GitHub are missing. These keys are normally located in `~/.ssh`. For SSH keys to be accepted you need to upload them in the settings section of GitHub UI. +it generally means that the SSH keys for connecting to GitHub are missing. These keys are normally located in `~/.ssh`. For SSH keys to be accepted you need to upload them in GitHub's settings. You can also clone the repository via https protocol: @@ -74,12 +71,17 @@ You can also add original ClickHouse repo address to your local repository to pu After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`. +:::note +Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md) and so on. +::: + ## Build System {#build-system} ClickHouse uses CMake and Ninja for building. -CMake - a meta-build system that can generate Ninja files (build tasks). -Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks. +- CMake - a meta-build system that can generate Ninja files (build tasks). + +- Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks. To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build`. diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 0b71a669638..77a550f2a0e 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -489,7 +489,7 @@ When using functions with response codes or `errno`, always check the result and ``` cpp if (0 != close(fd)) - throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); + throw ErrnoException(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file {}", file_name); ``` You can use assert to check invariant in code. diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index b024820024a..5e81eacc937 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -67,7 +67,6 @@ Engines in the family: Engines in the family: - [Distributed](../../engines/table-engines/special/distributed.md#distributed) -- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview) - [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) - [Merge](../../engines/table-engines/special/merge.md#merge) - [File](../../engines/table-engines/special/file.md#file) diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 9af857b0835..44febe78c77 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -212,5 +212,5 @@ ORDER BY key ASC ``` ### More information on Joins -- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#settings-join_algorithm) +- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#join_algorithm) - [JOIN clause](/docs/en/sql-reference/statements/select/join.md) diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 19221c256f9..96e6bab6997 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -236,7 +236,7 @@ libhdfs3 support HDFS namenode HA. ## Storage Settings {#storage-settings} -- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. +- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index de1a090d491..141d87fed20 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -54,7 +54,7 @@ Optional parameters: - `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. - `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`. -- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`. - `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`. - `kafka_client_id` — Client identifier. Empty by default. @@ -151,7 +151,7 @@ Example: SELECT level, sum(total) FROM daily GROUP BY level; ``` -To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/settings/settings.md/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block. +To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/settings/settings.md/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block. To stop receiving topic data or to change the conversion logic, detach the materialized view: diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index 37a41159fab..e898d1f1b82 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -58,7 +58,7 @@ Optional parameters: - `nats_reconnect_wait` – Amount of time in milliseconds to sleep between each reconnect attempt. Default: `5000`. - `nats_server_list` - Server list for connection. Can be specified to connect to NATS cluster. - `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). -- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `nats_flush_interval_ms` - Timeout for flushing data read from NATS. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). - `nats_username` - NATS username. - `nats_password` - NATS password. diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 53c6e089a70..0f3fef3d6fb 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -65,7 +65,7 @@ Optional parameters: - `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified. - `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`. - `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). Default: `0`. -- `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `rabbitmq_flush_interval_ms` - Timeout for flushing data from RabbitMQ. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). - `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue. - `rabbitmq_address` - Address for connection. Use ether this setting or `rabbitmq_host_port`. diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 3144bdd32fa..dfa06801d04 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -222,7 +222,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ## Storage Settings {#storage-settings} -- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. +- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 2a9b25e0c1a..8b7f86cce5c 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -4,16 +4,9 @@ sidebar_position: 181 sidebar_label: S3Queue --- -# [experimental] S3Queue Table Engine +# S3Queue Table Engine This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem and allows streaming import. This engine is similar to the [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) engines, but provides S3-specific features. -:::note -This table engine is experimental. To use it, set `allow_experimental_s3queue` to 1 by using the `SET` command: -```sql -SET allow_experimental_s3queue=1 -``` -::: - ## Create Table {#creating-a-table} ``` sql diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 97d37e476ae..23d98d4b20e 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -12,7 +12,7 @@ In most cases you do not need a partition key, and in most other cases you do no You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression. ::: -Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well. +Partitioning is available for the [MergeTree family tables](../../../engines/table-engines/mergetree-family/mergetree.md), including [replicated tables](../../../engines/table-engines/mergetree-family/replication.md) and [materialized views](../../../sql-reference/statements/create/view.md#materialized-view). A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. Partitions improve performance for queries containing a partitioning key because ClickHouse will filter for that partition before selecting the parts and granules within the partition. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 89b002da192..ed413959ca6 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -520,7 +520,7 @@ Indexes of type `set` can be utilized by all functions. The other index types ar | [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | | [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | | [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ | -| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | +| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ | | [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | | hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | @@ -865,10 +865,10 @@ Tags: - `disk` — a disk within a volume. - `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. - `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. -- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. - `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). If disabled then already expired data part is written into a default volume and then right after moved to TTL volume. - `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. - `least_used_ttl_ms` - Configure timeout (in milliseconds) for the updating available space on all disks (`0` - update always, `-1` - never update, default is `60000`). Note, if the disk can be used by ClickHouse only and is not subject to a online filesystem resize/shrink you can use `-1`, in all other cases it is not recommended, since eventually it will lead to incorrect space distribution. +- `prefer_not_to_merge` — You should not use this setting. Disables merging of data parts on this volume (this is harmful and leads to performance degradation). When this setting is enabled (don't do it), merging data on this volume is not allowed (which is bad). This allows (but you don't need it) controlling (if you want to control something, you're making a mistake) how ClickHouse works with slow disks (but ClickHouse knows better, so please don't use this setting). Configuration examples: @@ -905,7 +905,6 @@ Configuration examples: external - true diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 14431c4c43b..de8ae0357dc 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -1,13 +1,16 @@ --- -slug: /en/engines/table-engines/special/distributed +sidebar_label: "Distributed" sidebar_position: 10 -sidebar_label: Distributed +slug: /en/engines/table-engines/special/distributed --- # Distributed Table Engine -Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers. -Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. +:::warning +To create a distributed table engine in the cloud, you can use the [remote and remoteSecure](../../../sql-reference/table-functions/remote) table functions. The `Distributed(...)` syntax cannot be used in ClickHouse Cloud. +::: + +Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers. Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. ## Creating a Table {#distributed-creating-a-table} @@ -22,6 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ``` ### From a Table {#distributed-from-a-table} + When the `Distributed` table is pointing to a table on the current server you can adopt that table's schema: ``` sql @@ -48,7 +52,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 Specifying the `sharding_key` is necessary for the following: -- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key +- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key. - For use with `optimize_skip_unused_shards` as the `sharding_key` is necessary to determine what shards should be queried #### policy_name @@ -108,7 +112,7 @@ Specifying the `sharding_key` is necessary for the following: For **Insert limit settings** (`..._insert`) see also: - [distributed_foreground_insert](../../../operations/settings/settings.md#distributed_foreground_insert) setting -- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting +- [prefer_localhost_replica](../../../operations/settings/settings.md#prefer-localhost-replica) setting - `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert` ::: @@ -122,9 +126,7 @@ SETTINGS fsync_directories=0; ``` -Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster. -Data is not only read but is partially processed on the remote servers (to the extent that this is possible). -For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated. +Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster. Data is not only read but is partially processed on the remote servers (to the extent that this is possible). For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated. Instead of the database name, you can use a constant expression that returns a string. For example: `currentDatabase()`. @@ -183,9 +185,7 @@ Clusters are configured in the [server configuration file](../../../operations/c ``` -Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas. -Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards). -Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas). +Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas. Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards). Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas). Cluster names must not contain dots. @@ -198,9 +198,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com - `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `9440` and be configured with correct certificates. - `compression` - Use data compression. Default value: `true`. -When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting. -If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. -This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly. +When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly. You can specify just one of the shards (in this case, query processing should be called remote, rather than distributed) or up to any number of shards. In each shard, you can specify from one to any number of replicas. You can specify a different number of replicas for each shard. @@ -245,7 +243,7 @@ If the server ceased to exist or had a rough restart (for example, due to a hard When querying a `Distributed` table, `SELECT` queries are sent to all shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. -When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas). +When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#max_parallel_replicas). To learn more about how distributed `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation. diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 6e3897398a5..fdf5242ba3b 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -101,8 +101,8 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Settings {#settings} -- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. +- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. - [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. - [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. +- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. diff --git a/docs/en/engines/table-engines/special/filelog.md b/docs/en/engines/table-engines/special/filelog.md index eef9a17444e..82201053bc5 100644 --- a/docs/en/engines/table-engines/special/filelog.md +++ b/docs/en/engines/table-engines/special/filelog.md @@ -41,7 +41,7 @@ Optional parameters: - `poll_timeout_ms` - Timeout for single poll from log file. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms). - `poll_max_batch_size` — Maximum amount of records to be polled in a single poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). -- `max_block_size` — The maximum batch size (in records) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `max_block_size` — The maximum batch size (in records) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `max_threads` - Number of max threads to parse files, default is 0, which means the number will be max(1, physical_cpu_cores / 4). - `poll_directory_watch_events_backoff_init` - The initial sleep value for watch directory thread. Default: `500`. - `poll_directory_watch_events_backoff_max` - The max sleep value for watch directory thread. Default: `32000`. diff --git a/docs/en/engines/table-engines/special/materializedview.md b/docs/en/engines/table-engines/special/materializedview.md deleted file mode 100644 index d5f3b364d4e..00000000000 --- a/docs/en/engines/table-engines/special/materializedview.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -slug: /en/engines/table-engines/special/materializedview -sidebar_position: 100 -sidebar_label: MaterializedView ---- - -# MaterializedView Table Engine - -Used for implementing materialized views (for more information, see [CREATE VIEW](../../../sql-reference/statements/create/view.md#materialized)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine. diff --git a/docs/en/getting-started/example-datasets/youtube-dislikes.md b/docs/en/getting-started/example-datasets/youtube-dislikes.md index e24c6e5a6dc..a6bbb20cc8d 100644 --- a/docs/en/getting-started/example-datasets/youtube-dislikes.md +++ b/docs/en/getting-started/example-datasets/youtube-dislikes.md @@ -25,8 +25,7 @@ The steps below will easily work on a local install of ClickHouse too. The only 1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result: ```sql -DESCRIBE s3Cluster( - 'default', +DESCRIBE s3( 'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst', 'JSONLines' ); @@ -35,29 +34,29 @@ DESCRIBE s3Cluster( ClickHouse infers the following schema from the JSON file: ```response -┌─name────────────────┬─type─────────────────────────────────┐ -│ id │ Nullable(String) │ -│ fetch_date │ Nullable(Int64) │ -│ upload_date │ Nullable(String) │ -│ title │ Nullable(String) │ -│ uploader_id │ Nullable(String) │ -│ uploader │ Nullable(String) │ -│ uploader_sub_count │ Nullable(Int64) │ -│ is_age_limit │ Nullable(Bool) │ -│ view_count │ Nullable(Int64) │ -│ like_count │ Nullable(Int64) │ -│ dislike_count │ Nullable(Int64) │ -│ is_crawlable │ Nullable(Bool) │ -│ is_live_content │ Nullable(Bool) │ -│ has_subtitles │ Nullable(Bool) │ -│ is_ads_enabled │ Nullable(Bool) │ -│ is_comments_enabled │ Nullable(Bool) │ -│ description │ Nullable(String) │ -│ rich_metadata │ Array(Map(String, Nullable(String))) │ -│ super_titles │ Array(Map(String, Nullable(String))) │ -│ uploader_badges │ Nullable(String) │ -│ video_badges │ Nullable(String) │ -└─────────────────────┴──────────────────────────────────────┘ +┌─name────────────────┬─type───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(String) │ │ │ │ │ │ +│ fetch_date │ Nullable(String) │ │ │ │ │ │ +│ upload_date │ Nullable(String) │ │ │ │ │ │ +│ title │ Nullable(String) │ │ │ │ │ │ +│ uploader_id │ Nullable(String) │ │ │ │ │ │ +│ uploader │ Nullable(String) │ │ │ │ │ │ +│ uploader_sub_count │ Nullable(Int64) │ │ │ │ │ │ +│ is_age_limit │ Nullable(Bool) │ │ │ │ │ │ +│ view_count │ Nullable(Int64) │ │ │ │ │ │ +│ like_count │ Nullable(Int64) │ │ │ │ │ │ +│ dislike_count │ Nullable(Int64) │ │ │ │ │ │ +│ is_crawlable │ Nullable(Bool) │ │ │ │ │ │ +│ is_live_content │ Nullable(Bool) │ │ │ │ │ │ +│ has_subtitles │ Nullable(Bool) │ │ │ │ │ │ +│ is_ads_enabled │ Nullable(Bool) │ │ │ │ │ │ +│ is_comments_enabled │ Nullable(Bool) │ │ │ │ │ │ +│ description │ Nullable(String) │ │ │ │ │ │ +│ rich_metadata │ Array(Tuple(call Nullable(String), content Nullable(String), subtitle Nullable(String), title Nullable(String), url Nullable(String))) │ │ │ │ │ │ +│ super_titles │ Array(Tuple(text Nullable(String), url Nullable(String))) │ │ │ │ │ │ +│ uploader_badges │ Nullable(String) │ │ │ │ │ │ +│ video_badges │ Nullable(String) │ │ │ │ │ │ +└─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` 2. Based on the inferred schema, we cleaned up the data types and added a primary key. Define the following table: @@ -82,13 +81,13 @@ CREATE TABLE youtube `is_ads_enabled` Bool, `is_comments_enabled` Bool, `description` String, - `rich_metadata` Array(Map(String, String)), - `super_titles` Array(Map(String, String)), + `rich_metadata` Array(Tuple(call String, content String, subtitle String, title String, url String)), + `super_titles` Array(Tuple(text String, url String)), `uploader_badges` String, `video_badges` String ) ENGINE = MergeTree -ORDER BY (uploader, upload_date); +ORDER BY (uploader, upload_date) ``` 3. The following command streams the records from the S3 files into the `youtube` table. diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 2ea900ba753..e8662ec16fa 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -81,7 +81,7 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun sudo apt-get install -y apt-transport-https ca-certificates dirmngr GNUPGHOME=$(mktemp -d) sudo GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 -sudo rm -r "$GNUPGHOME" +sudo rm -rf "$GNUPGHOME" sudo chmod +r /usr/share/keyrings/clickhouse-keyring.gpg echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 57de0555bf6..836b1f2f637 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -478,6 +478,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. +- [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 63f75fb7830..4eeb19cefcf 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -167,7 +167,7 @@ For successful requests that do not return a data table, an empty response body You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed. -You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting. +You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#http_native_compression_disable_checksumming_on_decompress) setting. If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method. @@ -183,7 +183,7 @@ You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP - `snappy` To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. -In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods. +In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#http_zlib_compression_level) setting for all compression methods. :::info Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly. @@ -285,7 +285,7 @@ For information about other parameters, see the section “SET”. Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to add the `session_id` GET parameter to the request. You can use any string as the session ID. By default, the session is terminated after 60 seconds of inactivity. To change this timeout, modify the `default_session_timeout` setting in the server configuration, or add the `session_timeout` GET parameter to the request. To check the session status, use the `session_check=1` parameter. Only one query at a time can be executed within a single session. -You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence: +You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#send_progress_in_http_headers). Example of the header sequence: ``` text X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334"} @@ -496,7 +496,7 @@ Next are the configuration methods for different `type`. `query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration. -The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully. +The following example defines the values of [max_threads](../operations/settings/settings.md#max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully. :::note To keep the default `handlers` such as` query`, `play`,` ping`, add the `` rule. @@ -539,7 +539,7 @@ In `dynamic_query_handler`, the query is written in the form of parameter of the ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the parameter is not passed in. -To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully. +To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#max_threads) and `max_final_threads` and `queries` whether the settings were set successfully. Example: diff --git a/docs/en/interfaces/overview.md b/docs/en/interfaces/overview.md index e60aff927c4..0e09ab6a0b7 100644 --- a/docs/en/interfaces/overview.md +++ b/docs/en/interfaces/overview.md @@ -25,6 +25,7 @@ ClickHouse server provides embedded visual interfaces for power users: - Play UI: open `/play` in the browser; - Advanced Dashboard: open `/dashboard` in the browser; +- Binary symbols viewer for ClickHouse engineers: open `/binary` in the browser; There are also a wide range of third-party libraries for working with ClickHouse: diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 0aadb09730a..ef858796936 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -834,6 +834,27 @@ $$) └──────────────┴───────────────┘ ``` +#### CSV settings {#csv-settings} + +##### input_format_csv_try_infer_numbers_from_strings + +Enabling this setting allows inferring numbers from string values. + +This setting is disabled by default. + +**Example:** + +```sql +SET input_format_json_try_infer_numbers_from_strings = 1; +DESC format(CSV, '"42","42.42"'); +``` +```reponse +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(Float64) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + ### TSV/TSKV {#tsv-tskv} In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using @@ -1846,3 +1867,102 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') SETTINGS allow_ex │ json │ Object('json') │ │ │ │ │ │ └──────┴────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` + +## Schema inference modes {#schema-inference-modes} + +Schema inference from the set of data files can work in 2 different modes: `default` and `union`. +The mode is controlled by the setting `schema_inference_mode`. + +### Default mode {#default-schema-inference-mode} + +In default mode, ClickHouse assumes that all files have the same schema and tries to infer the schema by reading files one by one until it succeeds. + +Example: + +Let's say we have 3 files `data1.jsonl`, `data2.jsonl` and `data3.jsonl` with the next content: + +`data1.jsonl`: +```json +{"field1" : 1, "field2" : null} +{"field1" : 2, "field2" : null} +{"field1" : 3, "field2" : null} +``` + +`data2.jsonl`: +```json +{"field1" : 4, "field2" : "Data4"} +{"field1" : 5, "field2" : "Data5"} +{"field1" : 6, "field2" : "Data5"} +``` + +`data3.jsonl`: +```json +{"field1" : 7, "field2" : "Data7", "field3" : [1, 2, 3]} +{"field1" : 8, "field2" : "Data8", "field3" : [4, 5, 6]} +{"field1" : 9, "field2" : "Data9", "field3" : [7, 8, 9]} +``` + +Let's try to use schema inference on these 3 files: +```sql +:) DESCRIBE file('data{1,2,3}.jsonl') SETTINGS schema_inference_mode='default' +``` + +Result: +```text +┌─name───┬─type─────────────┐ +│ field1 │ Nullable(Int64) │ +│ field2 │ Nullable(String) │ +└────────┴──────────────────┘ +``` + +As we can see, we don't have `field3` from file `data3.jsonl`. +It happens because ClickHouse first tried to infer schema from file `data1.jsonl`, failed because of only nulls for field `field2`, +and then tried to infer schema from `data2.jsonl` and succeeded, so data from file `data3.jsonl` wasn't read. + +### Union mode {#default-schema-inference-mode} + +In union mode, ClickHouse assumes that files can have different schemas, so it infer schemas of all files and then union them to the common schema. + +Let's say we have 3 files `data1.jsonl`, `data2.jsonl` and `data3.jsonl` with the next content: + +`data1.jsonl`: +```json +{"field1" : 1} +{"field1" : 2} +{"field1" : 3} +``` + +`data2.jsonl`: +```json +{"field2" : "Data4"} +{"field2" : "Data5"} +{"field2" : "Data5"} +``` + +`data3.jsonl`: +```json +{"field3" : [1, 2, 3]} +{"field3" : [4, 5, 6]} +{"field3" : [7, 8, 9]} +``` + +Let's try to use schema inference on these 3 files: +```sql +:) DESCRIBE file('data{1,2,3}.jsonl') SETTINGS schema_inference_mode='union' +``` + +Result: +```text +┌─name───┬─type───────────────────┐ +│ field1 │ Nullable(Int64) │ +│ field2 │ Nullable(String) │ +│ field3 │ Array(Nullable(Int64)) │ +└────────┴────────────────────────┘ +``` + +As we can see, we have all fields from all files. + +Note: +- As some of the files may not contain some columns from the resulting schema, union mode is supported only for formats that support reading subset of columns (like JSONEachRow, Parquet, TSVWithNames, etc) and won't work for other formats (like CSV, TSV, JSONCompactEachRow, etc). +- If ClickHouse cannot infer the schema from one of the files, the exception will be thrown. +- If you have a lot of files, reading schema from all of them can take a lot of time. diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 6068b185ede..d45885ee816 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -406,7 +406,7 @@ RESTORE TABLE data AS data_restored FROM Disk('s3_plain', 'cloud_backup'); :::note But keep in mind that: - This disk should not be used for `MergeTree` itself, only for `BACKUP`/`RESTORE` -- If your tables are backed by S3 storage, it doesn't use `CopyObject` calls to copy parts to the destination bucket, instead, it downloads and uploads them, which is very inefficient. Prefer to use `BACKUP ... TO S3()` syntax for this use-case. +- If your tables are backed by S3 storage and types of the disks are different, it doesn't use `CopyObject` calls to copy parts to the destination bucket, instead, it downloads and uploads them, which is very inefficient. Prefer to use `BACKUP ... TO S3()` syntax for this use-case. ::: ## Alternatives diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index ebf981690a9..de61da6f5c4 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -15,6 +15,27 @@ You can monitor: - Utilization of hardware resources. - ClickHouse server metrics. +## Built-in observability dashboard + +Screenshot 2023-11-12 at 6 08 58 PM + +ClickHouse comes with a built-in observability dashboard feature which can be accessed by `$HOST:$PORT/dashboard` (requires user and password) that shows the following metrics: +- Queries/second +- CPU usage (cores) +- Queries running +- Merges running +- Selected bytes/second +- IO wait +- CPU wait +- OS CPU Usage (userspace) +- OS CPU Usage (kernel) +- Read from disk +- Read from filesystem +- Memory (tracked) +- Inserted rows/second +- Total MergeTree parts +- Max parts for partition + ## Resource Utilization {#resource-utilization} ClickHouse also monitors the state of hardware resources by itself such as: @@ -43,4 +64,4 @@ You can configure ClickHouse to export metrics to [Prometheus](https://prometheu Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`. -To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap. +To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap. diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 206f710734e..194d2714422 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -42,7 +42,7 @@ To analyze the `trace_log` system table: - Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). -- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. +- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#allow_introspection_functions) setting. For security reasons, introspection functions are disabled by default. diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index def0f48b968..50c5ff4457f 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -29,6 +29,10 @@ Transactionally inconsistent caching is traditionally provided by client tools o the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side. This reduces maintenance effort and avoids redundancy. +:::note +Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results. +::: + ## Configuration Settings and Usage Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the @@ -99,7 +103,7 @@ It is also possible to limit the cache usage of individual users using [settings constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may allocate in the query cache and the maximum number of stored query results. For that, first provide configurations [query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and -[query_cache_max_entries](settings/settings.md#query-cache-size-max-entries) in a user profile in `users.xml`, then make both settings +[query_cache_max_entries](settings/settings.md#query-cache-max-entries) in a user profile in `users.xml`, then make both settings readonly: ``` xml @@ -140,7 +144,7 @@ value can be specified at session, profile or query level using setting [query_c Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries). -ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#settings-max_block_size) rows. Due to filtering, aggregation, +ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation, etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks are squashed (if they are tiny) or split (if they are large) into blocks of 'max_block_size' size before insertion into the query result diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index cc2692b8e02..48434d992e2 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -472,6 +472,39 @@ The value 0 means that you can delete all tables without any restrictions. ``` xml 0 ``` + + +## max\_database\_num\_to\_warn {#max-database-num-to-warn} +If the number of attached databases exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 1000 + +**Example** + +``` xml +50 +``` + +## max\_table\_num\_to\_warn {#max-table-num-to-warn} +If the number of attached tables exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 5000 + +**Example** + +``` xml +400 +``` + + +## max\_part\_num\_to\_warn {#max-part-num-to-warn} +If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 100000 + +**Example** + +``` xml +400 +``` + ## max_temporary_data_on_disk_size @@ -1650,7 +1683,7 @@ Default value: `0.5`. Asynchronous loading of databases and tables. -If `true` all non-system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after the ClickHouse server start up. See `system.async_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a table, that is not yet loaded, will wait for exactly this table to be started up. If load job fails, query will rethrow an error (instead of shutting down the whole server in case of `async_load_databases = false`). The table that is waited for by at least one query will be loaded with higher priority. DDL queries on a database will wait for exactly that database to be started up. +If `true` all non-system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after the ClickHouse server start up. See `system.asynchronous_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a table, that is not yet loaded, will wait for exactly this table to be started up. If load job fails, query will rethrow an error (instead of shutting down the whole server in case of `async_load_databases = false`). The table that is waited for by at least one query will be loaded with higher priority. DDL queries on a database will wait for exactly that database to be started up. If `false`, all databases are loaded when the server starts. @@ -1976,7 +2009,7 @@ Data for the query cache is allocated in DRAM. If memory is scarce, make sure to ## query_thread_log {#query_thread_log} -Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting. +Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#log-query-threads) setting. Queries are logged in the [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). @@ -2018,7 +2051,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the ## query_views_log {#query_views_log} -Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views) setting. +Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#log-query-views) setting. Queries are logged in the [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). @@ -2298,7 +2331,7 @@ For the value of the `incl` attribute, see the section “[Configuration files]( **See Also** -- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [skip_unavailable_shards](../../operations/settings/settings.md#skip_unavailable_shards) - [Cluster Discovery](../../operations/cluster-discovery.md) - [Replicated database engine](../../engines/database-engines/replicated.md) diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 9e36aa26946..1cb7ec9dced 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -139,7 +139,7 @@ Limit on the number of bytes in the result. The same as the previous setting. What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. -Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#settings-max_threads). +Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#max_threads). Example: diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 344e6dda680..3d76bd9df73 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1130,6 +1130,13 @@ Result a 0 1971-01-01 ``` +## input_format_csv_try_infer_numbers_from_strings {#input_format_csv_try_infer_numbers_from_strings} + +If enabled, during schema inference ClickHouse will try to infer numbers from string fields. +It can be useful if CSV data contains quoted UInt64 numbers. + +Disabled by default. + ## Values format settings {#values-format-settings} ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 1f41eafd02e..96477f777a9 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -4,7 +4,7 @@ sidebar_position: 63 sidebar_label: User Settings --- -# User Settings +# Users and Roles Settings The `users` section of the `user.xml` configuration file contains user settings. @@ -187,3 +187,34 @@ The following configuration forces that user `user1` can only see the rows of `t ``` The `filter` can be any expression resulting in a [UInt8](../../sql-reference/data-types/int-uint.md)-type value. It usually contains comparisons and logical operators. Rows from `database_name.table1` where filter results to 0 are not returned for this user. The filtering is incompatible with `PREWHERE` operations and disables `WHERE→PREWHERE` optimization. + +## Roles + +You can create any predefined roles using the `roles` section of the `user.xml` configuration file. + +Structure of the `roles` section: + +```xml + + + + GRANT SHOW ON *.* + REVOKE SHOW ON system.* + GRANT CREATE ON *.* WITH GRANT OPTION + + + +``` + +These roles can also be granted to users from the `users` section: + +```xml + + + ... + + GRANT test_role + + + +``` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c8d54d76704..6e087467bb9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -460,6 +460,12 @@ Possible values: Default value: 1048576. +## http_make_head_request {#http-make-head-request} + +The `http_make_head_request` setting allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size. Since it's enabled by default, it may be desirable to disable this setting in cases where the server does not support `HEAD` requests. + +Default value: `true`. + ## table_function_remote_max_addresses {#table_function_remote_max_addresses} Sets the maximum number of addresses generated from patterns for the [remote](../../sql-reference/table-functions/remote.md) function. @@ -1578,9 +1584,15 @@ Default value: `default`. ## allow_experimental_parallel_reading_from_replicas -If true, ClickHouse will send a SELECT query to all replicas of a table (up to `max_parallel_replicas`) . It will work for any kind of MergeTree table. +Enables or disables sending SELECT queries to all replicas of a table (up to `max_parallel_replicas`). Reading is parallelized and coordinated dynamically. It will work for any kind of MergeTree table. -Default value: `false`. +Possible values: + +- 0 - Disabled. +- 1 - Enabled, silently disabled in case of failure. +- 2 - Enabled, throws an exception in case of failure. + +Default value: `0`. ## compile_expressions {#compile-expressions} @@ -1704,7 +1716,7 @@ Default value: `1` ## query_cache_squash_partial_results {#query-cache-squash-partial-results} -Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query_cache_compress_entries)). +Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query-cache-compress-entries)). Possible values: @@ -2474,7 +2486,7 @@ See also: - [load_balancing](#load_balancing-round_robin) - [Table engine Distributed](../../engines/table-engines/special/distributed.md) - [distributed_replica_error_cap](#distributed_replica_error_cap) -- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) +- [distributed_replica_error_half_life](#distributed_replica_error_half_life) ## distributed_background_insert_sleep_time_ms {#distributed_background_insert_sleep_time_ms} @@ -2647,7 +2659,7 @@ Default value: 0. ## input_format_parallel_parsing {#input-format-parallel-parsing} -Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TKSV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. +Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. Possible values: @@ -2658,7 +2670,7 @@ Default value: `1`. ## output_format_parallel_formatting {#output-format-parallel-formatting} -Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TKSV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. +Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. Possible values: @@ -4152,6 +4164,41 @@ Result: └─────┴─────┴───────┘ ``` +## enable_order_by_all {#enable-order-by-all} + +Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md) + +Possible values: + +- 0 — Disable ORDER BY ALL. +- 1 — Enable ORDER BY ALL. + +Default value: `1`. + +**Example** + +Query: + +```sql +CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory(); + +INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20); + +SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous + +SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all; +``` + +Result: + +```text +┌─C1─┬─C2─┬─ALL─┐ +│ 20 │ 20 │ 10 │ +│ 30 │ 10 │ 20 │ +│ 10 │ 20 │ 30 │ +└────┴────┴─────┘ +``` + ## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string} Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array. @@ -4349,6 +4396,8 @@ Default value: `1GiB`. ## Schema Inference settings +See [schema inference](../../interfaces/schema-inference.md#schema-inference-modes) documentation for more details. + ### schema_inference_use_cache_for_file {schema_inference_use_cache_for_file} Enable schemas cache for schema inference in `file` table function. @@ -4390,6 +4439,13 @@ Possible values: Default value: 2. +### schema_inference_mode {schema_inference_mode} + +The mode of schema inference. Possible values: `default` and `union`. +See [schema inference modes](../../interfaces/schema-inference.md#schema-inference-modes) section for more details. + +Default value: `default`. + ## compatibility {#compatibility} The `compatibility` setting causes ClickHouse to use the default settings of a previous version of ClickHouse, where the previous version is provided as the setting. @@ -4659,7 +4715,7 @@ Possible values: Default value: `false`. -## rename_files_after_processing +## rename_files_after_processing {#rename_files_after_processing} - **Type:** String @@ -5078,3 +5134,25 @@ When set to `true` than for all s3 requests first two attempts are made with low When set to `false` than all attempts are made with identical timeouts. Default value: `true`. + +## max_partition_size_to_drop + +Restriction on dropping partitions in query time. + +Default value: 50 GB. +The value 0 means that you can drop partitions without any restrictions. + +:::note +This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop) +::: + +## max_table_size_to_drop + +Restriction on deleting tables in query time. + +Default value: 50 GB. +The value 0 means that you can delete all tables without any restrictions. + +:::note +This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop) +::: \ No newline at end of file diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 796f65a9d30..b3ef1128c42 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -196,7 +196,7 @@ These settings should be defined in the disk configuration section. - `max_elements` - a limit for a number of cache files. Default: `10000000`. -- `load_metadata_threads` - number of threads being used to load cache metadata on starting time. Default: `1`. +- `load_metadata_threads` - number of threads being used to load cache metadata on starting time. Default: `16`. File Cache **query/profile settings**: diff --git a/docs/en/operations/system-tables/asynchronous_insert_log.md b/docs/en/operations/system-tables/asynchronous_insert_log.md index c3aaa8e6c41..d5f6ab07b10 100644 --- a/docs/en/operations/system-tables/asynchronous_insert_log.md +++ b/docs/en/operations/system-tables/asynchronous_insert_log.md @@ -13,6 +13,7 @@ ClickHouse does not delete data from the table automatically. See [Introduction] Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the async insert happened. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the async insert finished execution. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the async insert finished execution with microseconds precision. @@ -42,6 +43,7 @@ SELECT * FROM system.asynchronous_insert_log LIMIT 1 \G; Result: ``` text +hostname: clickhouse.eu-central1.internal event_date: 2023-06-08 event_time: 2023-06-08 10:08:53 event_time_microseconds: 2023-06-08 10:08:53.199516 diff --git a/docs/en/operations/system-tables/async_loader.md b/docs/en/operations/system-tables/asynchronous_loader.md similarity index 97% rename from docs/en/operations/system-tables/async_loader.md rename to docs/en/operations/system-tables/asynchronous_loader.md index 4e8651a6d3e..af9aa4ecd09 100644 --- a/docs/en/operations/system-tables/async_loader.md +++ b/docs/en/operations/system-tables/asynchronous_loader.md @@ -1,7 +1,7 @@ --- -slug: /en/operations/system-tables/async_loader +slug: /en/operations/system-tables/asynchronous_loader --- -# async_loader +# asynchronous_loader Contains information and status for recent asynchronous jobs (e.g. for tables loading). The table contains a row for every job. There is a tool for visualizing information from this table `utils/async_loader_graph`. @@ -9,7 +9,7 @@ Example: ``` sql SELECT * -FROM system.async_loader +FROM system.asynchronous_loader FORMAT Vertical LIMIT 1 ``` diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index efe57a202d8..65b2e349707 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -7,6 +7,7 @@ Contains the historical values for `system.asynchronous_metrics`, which are save Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. - `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. @@ -15,22 +16,33 @@ Columns: **Example** ``` sql -SELECT * FROM system.asynchronous_metric_log LIMIT 10 +SELECT * FROM system.asynchronous_metric_log LIMIT 3 \G ``` ``` text -┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ +Row 1: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2023-11-14 +event_time: 2023-11-14 14:39:07 +metric: AsynchronousHeavyMetricsCalculationTimeSpent +value: 0.001 + +Row 2: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2023-11-14 +event_time: 2023-11-14 14:39:08 +metric: AsynchronousHeavyMetricsCalculationTimeSpent +value: 0 + +Row 3: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2023-11-14 +event_time: 2023-11-14 14:39:09 +metric: AsynchronousHeavyMetricsCalculationTimeSpent +value: 0 ``` **See Also** diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index e46b495239c..fe8f963b1ec 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -239,6 +239,10 @@ The amount of virtual memory mapped for the pages of machine code of the server The amount of virtual memory mapped for the use of stack and for the allocated memory, in bytes. It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call. This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring. +### MemoryResidentMax + +Maximum amount of physical memory used by the server process, in bytes. + ### MemoryResident The amount of physical memory used by the server process, in bytes. @@ -547,6 +551,14 @@ Total amount of bytes (compressed, including data and indices) stored in all tab Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key. +### TotalPrimaryKeyBytesInMemory + +The total amount of memory (in bytes) used by primary key values (only takes active parts into account). + +### TotalPrimaryKeyBytesInMemoryAllocated + +The total amount of memory (in bytes) reserved for primary key values (only takes active parts into account). + ### TotalRowsOfMergeTreeTables Total amount of rows (records) stored in all tables of MergeTree family. diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md index 7e088fcad94..c73fd26683e 100644 --- a/docs/en/operations/system-tables/backup_log.md +++ b/docs/en/operations/system-tables/backup_log.md @@ -7,6 +7,7 @@ Contains logging entries with the information about `BACKUP` and `RESTORE` opera Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision. - `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation. @@ -45,6 +46,7 @@ SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885 ```response Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 event_time_microseconds: 2023-08-19 11:05:21.998566 id: e5b74ecb-f6f1-426a-80be-872f90043885 @@ -63,6 +65,7 @@ bytes_read: 0 Row 2: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 event_time_microseconds: 2023-08-19 11:08:56.916192 id: e5b74ecb-f6f1-426a-80be-872f90043885 @@ -93,6 +96,7 @@ SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 ```response Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 event_time_microseconds: 2023-08-19 11:09:19.718077 id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 @@ -111,6 +115,7 @@ bytes_read: 0 Row 2: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 event_time_microseconds: 2023-08-19 11:09:29.334234 id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 2659f80e338..63cc083e4bc 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -78,5 +78,5 @@ is_active: NULL **See Also** - [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) -- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) +- [distributed_replica_error_cap setting](../../operations/settings/settings.md#distributed_replica_error_cap) +- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#distributed_replica_error_half_life) diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index 4d015a513a2..e83da3624b2 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -7,6 +7,7 @@ Contains information about stack traces for fatal errors. The table does not exi Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. @@ -32,6 +33,7 @@ Result (not full): ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-10-14 event_time: 2020-10-14 15:47:40 timestamp_ns: 1602679660271312710 diff --git a/docs/en/operations/system-tables/database_engines.md b/docs/en/operations/system-tables/database_engines.md new file mode 100644 index 00000000000..09f0687af65 --- /dev/null +++ b/docs/en/operations/system-tables/database_engines.md @@ -0,0 +1,26 @@ +--- +slug: /en/operations/system-tables/database_engines +--- +# database_engines + +Contains the list of database engines supported by the server. + +This table contains the following columns (the column type is shown in brackets): + +- `name` (String) — The name of database engine. + +Example: + +``` sql +SELECT * +FROM system.database_engines +WHERE name in ('Atomic', 'Lazy', 'Ordinary') +``` + +``` text +┌─name─────┐ +│ Ordinary │ +│ Atomic │ +│ Lazy │ +└──────────┘ +``` diff --git a/docs/en/operations/system-tables/errors.md b/docs/en/operations/system-tables/errors.md index 01762962152..4582ea631b3 100644 --- a/docs/en/operations/system-tables/errors.md +++ b/docs/en/operations/system-tables/errors.md @@ -9,11 +9,15 @@ Columns: - `name` ([String](../../sql-reference/data-types/string.md)) — name of the error (`errorCodeToName`). - `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — code number of the error. -- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened. -- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error happened. +- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — the time when the last error happened. - `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error. -- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. -- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query). +- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) that represents a list of physical addresses where the called methods are stored. +- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed queries). + +:::note +Counters for some errors may increase during successful query execution. It's not recommended to use this table for server monitoring purposes unless you are sure that corresponding error can not be a false positive. +::: **Example** diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index 9ea0dde3f80..f0b717a3bbf 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -6,6 +6,7 @@ slug: /en/operations/system-tables/metric_log Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. @@ -19,6 +20,7 @@ SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical; ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-09-05 event_time: 2020-09-05 16:22:33 event_time_microseconds: 2020-09-05 16:22:33.196807 diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md index a605a46c14c..5f03c2f8ada 100644 --- a/docs/en/operations/system-tables/opentelemetry_span_log.md +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -8,28 +8,19 @@ Contains information about [trace spans](https://opentracing.io/docs/overview/sp Columns: - `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query. - - `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. - - `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. - - `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. - - `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span. - `INTERNAL` — Indicates that the span represents an internal operation within an application. - `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. - `CLIENT` — Indicates that the span describes a request to some remote service. - `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. - `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request. - - `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). - - `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). - - `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. - - `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. - - `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. **Example** diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index b9185434e01..af582646653 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -9,6 +9,7 @@ This table contains information about events that occurred with [data parts](../ The `system.part_log` table contains the following columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. - `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: - `NewPart` — Inserting of a new data part. @@ -56,13 +57,14 @@ SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical; ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31 event_type: NewPart merge_reason: NotAMerge merge_algorithm: Undecided event_date: 2021-02-02 event_time: 2021-02-02 11:14:28 -event_time_microseconds: 2021-02-02 11:14:28.861919 +event_time_microseconds: 2021-02-02 11:14:28.861919 duration_ms: 35 database: default table: log_mt_2 diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md index 5eedb5a5dae..3c2a090efe3 100644 --- a/docs/en/operations/system-tables/processors_profile_log.md +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -4,6 +4,7 @@ This table contains profiling on processors level (that you can find in [`EXPLAI Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the event happened. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time with microseconds precision when the event happened. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index ced97166702..7fcc4928355 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -11,7 +11,7 @@ This table does not contain the ingested data for `INSERT` queries. You can change settings of queries logging in the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration. -You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#settings-log-queries). We do not recommend to turn off logging because information in this table is important for solving issues. +You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#log-queries). We do not recommend to turn off logging because information in this table is important for solving issues. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. @@ -30,10 +30,11 @@ Each query creates one or two rows in the `query_log` table, depending on the st You can use the [log_queries_probability](../../operations/settings/settings.md#log-queries-probability) setting to reduce the number of queries, registered in the `query_log` table. -You can use the [log_formatted_queries](../../operations/settings/settings.md#settings-log-formatted-queries) setting to log formatted queries to the `formatted_query` column. +You can use the [log_formatted_queries](../../operations/settings/settings.md#log-formatted-queries) setting to log formatted queries to the `formatted_query` column. Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: - `'QueryStart' = 1` — Successful start of query execution. - `'QueryFinish' = 2` — Successful end of query execution. @@ -100,7 +101,7 @@ Columns: - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. - `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) - `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. +- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#max_query_size). An empty string if it is not defined. - `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously. - `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query. - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. @@ -127,6 +128,7 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_t ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal type: QueryFinish event_date: 2021-11-03 event_time: 2021-11-03 16:13:54 @@ -167,7 +169,7 @@ initial_query_start_time: 2021-11-03 16:13:54 initial_query_start_time_microseconds: 2021-11-03 16:13:54.952325 interface: 1 os_user: sevirov -client_hostname: clickhouse.ru-central1.internal +client_hostname: clickhouse.eu-central1.internal client_name: ClickHouse client_revision: 54449 client_version_major: 21 diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index a6d5632ade9..0420a0392f2 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -8,7 +8,7 @@ Contains information about threads that execute queries, for example, thread nam To start logging: 1. Configure parameters in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section. -2. Set [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1. +2. Set [log_query_threads](../../operations/settings/settings.md#log-query-threads) to 1. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. @@ -18,6 +18,7 @@ You can use the [log_queries_probability](../../operations/settings/settings.md# Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. - `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. @@ -74,6 +75,7 @@ Columns: ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-09-11 event_time: 2020-09-11 10:08:17 event_time_microseconds: 2020-09-11 10:08:17.134042 diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md index e107e4f926c..41a69da70aa 100644 --- a/docs/en/operations/system-tables/query_views_log.md +++ b/docs/en/operations/system-tables/query_views_log.md @@ -8,7 +8,7 @@ Contains information about the dependent views executed when running a query, fo To start logging: 1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section. -2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1. +2. Set [log_query_views](../../operations/settings/settings.md#log-query-views) to 1. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. @@ -18,6 +18,7 @@ You can use the [log_queries_probability](../../operations/settings/settings.md# Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the last event of the view happened. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution. - `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution with microseconds precision. @@ -59,6 +60,7 @@ Result: ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2021-06-22 event_time: 2021-06-22 13:23:07 event_time_microseconds: 2021-06-22 13:23:07.738221 diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md index 5b1a2b2a489..5c6096b3adf 100644 --- a/docs/en/operations/system-tables/session_log.md +++ b/docs/en/operations/system-tables/session_log.md @@ -7,6 +7,7 @@ Contains information about all successful and failed login and logout events. Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Login/logout result. Possible values: - `LoginFailure` — Login error. - `LoginSuccess` — Successful login. @@ -57,6 +58,7 @@ Result: ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal type: LoginSuccess auth_id: 45e6bd83-b4aa-4a23-85e6-bd83b4aa1a23 session_id: diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index 5c7184b2b22..24271a943a4 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -17,7 +17,7 @@ Columns: - `UNKNOWN` - `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). - `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. -- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. Should be always false. When this setting is enabled, you did a mistake. - `perform_ttl_move_on_insert` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `perform_ttl_move_on_insert` setting. — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). - `load_balancing` ([Enum8](../../sql-reference/data-types/enum.md)) — Policy for disk balancing. Can have one of the following values: - `ROUND_ROBIN` diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 08594739ecf..56668abae31 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -14,7 +14,7 @@ This table contains the following columns (the column type is shown in brackets) - `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. - `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). - `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. -- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting). +- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#max-insert-threads) setting). Example: diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index e4461e14236..8049ab091c0 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -29,7 +29,7 @@ Columns: - `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies. -- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table). +- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([materialized views](../../sql-reference/statements/create/view.md#materialized-view) the current table). - `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table. @@ -57,6 +57,8 @@ Columns: - If the table stores data on disk, returns used space on disk (i.e. compressed). - If the table stores data in memory, returns approximated number of used bytes in memory. +- `total_bytes_uncompressed` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of uncompressed bytes, if it's possible to quickly determine the exact number of bytes from the part checksums for the table on storage, otherwise `NULL` (does not take underlying storage (if any) into account). + - `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). - `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index 897cefab0be..6ac1ddbf667 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -7,6 +7,7 @@ Contains logging entries. The logging level which goes to this table can be limi Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` (Date) — Date of the entry. - `event_time` (DateTime) — Time of the entry. - `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision. @@ -39,6 +40,7 @@ SELECT * FROM system.text_log LIMIT 1 \G ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-09-10 event_time: 2020-09-10 11:23:07 event_time_microseconds: 2020-09-10 11:23:07.871397 diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 983b5b5c176..5adc33de37f 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -12,37 +12,27 @@ To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressTo Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of sampling moment. - - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. - - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision. - - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. - - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1.`. This field contains the `revision`, but not the `version` of a server. - `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type: - - `Real` represents collecting stack traces by wall-clock time. - `CPU` represents collecting stack traces by CPU time. - `Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. - `MemorySample` represents collecting random allocations and deallocations. - `MemoryPeak` represents collecting updates of peak memory usage. - `ProfileEvent` represents collecting of increments of profile events. - - `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. - - `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table. - - `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. - - `size` ([Int64](../../sql-reference/data-types/int-uint.md)) - For trace types `Memory`, `MemorySample` or `MemoryPeak` is the amount of memory allocated, for other trace types is 0. - - `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string. - - `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of increment of profile event, for other trace types is 0. **Example** @@ -54,6 +44,7 @@ SELECT * FROM system.trace_log LIMIT 1 \G ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-09-10 event_time: 2020-09-10 11:23:09 event_time_microseconds: 2020-09-10 11:23:09.872924 diff --git a/docs/en/operations/system-tables/view_refreshes.md b/docs/en/operations/system-tables/view_refreshes.md new file mode 100644 index 00000000000..12377507b39 --- /dev/null +++ b/docs/en/operations/system-tables/view_refreshes.md @@ -0,0 +1,43 @@ +--- +slug: /en/operations/system-tables/view_refreshes +--- +# view_refreshes + +Information about [Refreshable Materialized Views](../../sql-reference/statements/create/view.md#refreshable-materialized-view). Contains all refreshable materialized views, regardless of whether there's a refresh in progress or not. + + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. +- `view` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `status` ([String](../../sql-reference/data-types/string.md)) — Current state of the refresh. +- `last_refresh_result` ([String](../../sql-reference/data-types/string.md)) — Outcome of the latest refresh attempt. +- `last_refresh_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the last refresh attempt. `NULL` if no refresh attempts happened since server startup or table creation. +- `last_success_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the last successful refresh. `NULL` if no successful refreshes happened since server startup or table creation. +- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — How long the last refresh attempt took. +- `next_refresh_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time at which the next refresh is scheduled to start. +- `remaining_dependencies` ([Array(String)](../../sql-reference/data-types/array.md)) — If the view has [refresh dependencies](../../sql-reference/statements/create/view.md#refresh-dependencies), this array contains the subset of those dependencies that are not satisfied for the current refresh yet. If `status = 'WaitingForDependencies'`, a refresh is ready to start as soon as these dependencies are fulfilled. +- `exception` ([String](../../sql-reference/data-types/string.md)) — if `last_refresh_result = 'Exception'`, i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace. +- `refresh_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of successful refreshes since last server restart or table creation. +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — Progress of the current refresh, between 0 and 1. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of rows read by the current refresh so far. +- `total_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Estimated total number of rows that need to be read by the current refresh. + +(There are additional columns related to current refresh progress, but they are currently unreliable.) + +**Example** + +```sql +SELECT + database, + view, + status, + last_refresh_result, + last_refresh_time, + next_refresh_time +FROM system.view_refreshes + +┌─database─┬─view───────────────────────┬─status────┬─last_refresh_result─┬───last_refresh_time─┬───next_refresh_time─┐ +│ default │ hello_documentation_reader │ Scheduled │ Finished │ 2023-12-01 01:24:00 │ 2023-12-01 01:25:00 │ +└──────────┴────────────────────────────┴───────────┴─────────────────────┴─────────────────────┴─────────────────────┘ +``` diff --git a/docs/en/operations/system-tables/zookeeper_log.md b/docs/en/operations/system-tables/zookeeper_log.md index dce5be29f62..dd2df2ba5fc 100644 --- a/docs/en/operations/system-tables/zookeeper_log.md +++ b/docs/en/operations/system-tables/zookeeper_log.md @@ -9,6 +9,7 @@ For requests, only columns with request parameters are filled in, and the remain Columns with request parameters: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values: - `Request` — The request has been sent. - `Response` — The response was received. @@ -63,6 +64,7 @@ Result: ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal type: Request event_date: 2021-08-09 event_time: 2021-08-09 21:38:30.291792 diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 8f6cf6ad147..757afff599c 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -93,7 +93,7 @@ While ClickHouse can work over NFS, it is not the best idea. ## Linux Kernel {#linux-kernel} -Don’t use an outdated Linux kernel. +Don't use an outdated Linux kernel. ## Network {#network} diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 8620b44c368..8b7d7f85552 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -53,7 +53,6 @@ clickhouse-benchmark [keys] < queries_file; - `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. - `--cumulative` — Printing cumulative data instead of data per interval. - `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. -- `--json=FILEPATH` — `JSON` output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. - `--user=USERNAME` — ClickHouse user name. Default value: `default`. - `--password=PSWD` — ClickHouse user password. Default value: empty string. - `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 5fbbf8f723c..3654cd157e9 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -487,24 +487,23 @@ Where: ## uniqUpTo(N)(x) -Calculates the number of different argument values ​​if it is less than or equal to N. If the number of different argument values is greater than N, it returns N + 1. +Calculates the number of different values of the argument up to a specified limit, `N`. If the number of different argument values is greater than `N`, this function returns `N` + 1, otherwise it calculates the exact value. -Recommended for use with small Ns, up to 10. The maximum value of N is 100. +Recommended for use with small `N`s, up to 10. The maximum value of `N` is 100. -For the state of an aggregate function, it uses the amount of memory equal to 1 + N \* the size of one value of bytes. -For strings, it stores a non-cryptographic hash of 8 bytes. That is, the calculation is approximated for strings. +For the state of an aggregate function, this function uses the amount of memory equal to 1 + `N` \* the size of one value of bytes. +When dealing with strings, this function stores a non-cryptographic hash of 8 bytes; the calculation is approximated for strings. -The function also works for several arguments. +For example, if you had a table that logs every search query made by users on your website. Each row in the table represents a single search query, with columns for the user ID, the search query, and the timestamp of the query. You can use `uniqUpTo` to generate a report that shows only the keywords that produced at least 5 unique users. -It works as fast as possible, except for cases when a large N value is used and the number of unique values is slightly less than N. - -Usage example: - -``` text -Problem: Generate a report that shows only keywords that produced at least 5 unique users. -Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5 +```sql +SELECT SearchPhrase +FROM SearchLog +GROUP BY SearchPhrase +HAVING uniqUpTo(4)(UserID) >= 5 ``` +`uniqUpTo(4)(UserID)` calculates the number of unique `UserID` values for each `SearchPhrase`, but it only counts up to 4 unique values. If there are more than 4 unique `UserID` values for a `SearchPhrase`, the function returns 5 (4 + 1). The `HAVING` clause then filters out the `SearchPhrase` values for which the number of unique `UserID` values is less than 5. This will give you a list of search keywords that were used by at least 5 unique users. ## sumMapFiltered(keys_to_keep)(keys, values) diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index a40108a331a..ca4067c8d8c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -28,7 +28,7 @@ In both cases the type of the returned value is [UInt64](../../../sql-reference/ **Details** -ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function. +ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function. The `SELECT count() FROM table` query is optimized by default using metadata from MergeTree. If you need to use row-level security, disable optimization using the [optimize_trivial_count_query](../../../operations/settings/settings.md#optimize-trivial-count-query) setting. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md deleted file mode 100644 index cc601c097fe..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ /dev/null @@ -1,48 +0,0 @@ - --- - toc_priority: 112 - --- - - # groupArraySorted {#groupArraySorted} - - Returns an array with the first N items in ascending order. - - ``` sql - groupArraySorted(N)(column) - ``` - - **Arguments** - - - `N` – The number of elements to return. - - If the parameter is omitted, default value is the size of input. - - - `column` – The value (Integer, String, Float and other Generic types). - - **Example** - - Gets the first 10 numbers: - - ``` sql - SELECT groupArraySorted(10)(number) FROM numbers(100) - ``` - - ``` text - ┌─groupArraySorted(10)(number)─┐ - │ [0,1,2,3,4,5,6,7,8,9] │ - └──────────────────────────────┘ - ``` - - - Gets all the String implementations of all numbers in column: - - ``` sql -SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5)); - - ``` - - ``` text - ┌─groupArraySorted(str)────────┐ - │ ['0','1','2','3','4'] │ - └──────────────────────────────┘ - ``` - \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 3bf0e070cae..b1f2c5bacbb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -54,7 +54,6 @@ ClickHouse-specific aggregate functions: - [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) - [groupArraySample](./grouparraysample.md) -- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md) - [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index e21dad5b2f5..62edc221858 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -1,62 +1,64 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/sparkbar -sidebar_position: 311 -sidebar_label: sparkbar ---- - -# sparkbar - -The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`. -Repetitions for all `x` falling into the same bucket are averaged, so data should be pre-aggregated. -Negative repetitions are ignored. - -If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end. -Otherwise, values outside the interval are ignored. - -**Syntax** - -``` sql -sparkbar(buckets[, min_x, max_x])(x, y) -``` - -**Parameters** - -- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md). -- `min_x` — The interval start. Optional parameter. -- `max_x` — The interval end. Optional parameter. - -**Arguments** - -- `x` — The field with values. -- `y` — The field with the frequency of values. - -**Returned value** - -- The frequency histogram. - -**Example** - -Query: - -``` sql -CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date; - -INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11'); - -SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); - -SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); -``` - -Result: - -``` text -┌─sparkbar(9)(event_date, cnt)─┐ -│ ▂▅▂▃▆█ ▂ │ -└──────────────────────────────┘ - -┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ -│ ▂▅▂▃▇▆█ │ -└──────────────────────────────────────────────────────────────────────────┘ -``` - +--- +slug: /en/sql-reference/aggregate-functions/reference/sparkbar +sidebar_position: 311 +sidebar_label: sparkbar +--- + +# sparkbar + +The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`. +Repetitions for all `x` falling into the same bucket are averaged, so data should be pre-aggregated. +Negative repetitions are ignored. + +If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end. +Otherwise, values outside the interval are ignored. + +**Syntax** + +``` sql +sparkbar(buckets[, min_x, max_x])(x, y) +``` + +**Parameters** + +- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md). +- `min_x` — The interval start. Optional parameter. +- `max_x` — The interval end. Optional parameter. + +**Arguments** + +- `x` — The field with values. +- `y` — The field with the frequency of values. + +**Returned value** + +- The frequency histogram. + +**Example** + +Query: + +``` sql +CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date; + +INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11'); + +SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); + +SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); +``` + +Result: + +``` text +┌─sparkbar(9)(event_date, cnt)─┐ +│ ▂▅▂▃▆█ ▂ │ +└──────────────────────────────┘ + +┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ +│ ▂▅▂▃▇▆█ │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + +The alias for this function is sparkBar. + diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 4f021b25809..9f86aaf2502 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -394,7 +394,7 @@ Configuration example: or ``` sql -LAYOUT(HASHED_ARRAY()) +LAYOUT(HASHED_ARRAY([SHARDS 1])) ``` ### complex_key_hashed_array @@ -412,7 +412,7 @@ Configuration example: or ``` sql -LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1])) ``` ### range_hashed {#range_hashed} @@ -2415,8 +2415,8 @@ clickhouse client \ --secure \ --password MY_PASSWORD \ --query " - INSERT INTO regexp_dictionary_source_table - SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)') + INSERT INTO regexp_dictionary_source_table + SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)') FORMAT CSV" < regexp_dict.csv ``` diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 00efa63c960..f5da00a8663 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -143,7 +143,7 @@ range([start, ] end [, step]) **Implementation details** - All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments. -- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#function_range_max_elements_in_block) setting. - Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type). **Examples** diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 565486275e6..0261589b968 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1809,6 +1809,8 @@ Alias: `dateTrunc`. - `quarter` - `year` + `unit` argument is case-insensitive. + - `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 4816a6f0032..a23849c13aa 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -319,9 +319,9 @@ This is a relatively fast non-cryptographic hash function of average quality for Calculates a 64-bit hash code from any type of integer. It works faster than intHash32. Average quality. -## SHA1, SHA224, SHA256, SHA512 +## SHA1, SHA224, SHA256, SHA512, SHA512_256 -Calculates SHA-1, SHA-224, SHA-256, SHA-512 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). **Syntax** @@ -1780,12 +1780,11 @@ Result: ## sqid Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/). -To use this function, set setting `allow_experimental_hash_functions = 1`. **Syntax** ```sql -sqid(number1,...) +sqid(number1, ...) ``` **Arguments** diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 8cb35483555..1025b8bdc3d 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -16,7 +16,7 @@ For proper operation of introspection functions: - Install the `clickhouse-common-static-dbg` package. -- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1. +- Set the [allow_introspection_functions](../../operations/settings/settings.md#allow_introspection_functions) setting to 1. For security reasons introspection functions are disabled by default. diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 33c788a632e..be20e02d77e 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -501,41 +501,3 @@ Result: │ 0 │ └────────────────────────────────────────────────────────────────────┘ ``` - -## reverseDNSQuery - -Performs a reverse DNS query to get the PTR records associated with the IP address. - -**Syntax** - -``` sql -reverseDNSQuery(address) -``` - -This function performs reverse DNS resolutions on both IPv4 and IPv6. - -**Arguments** - -- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). - -**Returned value** - -- Associated domains (PTR records). - -Type: Type: [Array(String)](../../sql-reference/data-types/array.md). - -**Example** - -Query: - -``` sql -SELECT reverseDNSQuery('192.168.0.2'); -``` - -Result: - -``` text -┌─reverseDNSQuery('192.168.0.2')────────────┐ -│ ['test2.example.com','test3.example.com'] │ -└───────────────────────────────────────────┘ -``` diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b2a1d5066bb..35f9c7af2ce 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -67,45 +67,7 @@ WHERE macro = 'test'; │ test │ Value │ └───────┴──────────────┘ ``` - -## getClientHTTPHeader -Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception. -**Syntax** - -```sql -getClientHTTPHeader(name); -``` - -**Arguments** - -- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string) - -**Returned value** - -Value of the specified header. -Type:[String](../../sql-reference/data-types/string.md#string). - - -When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol. -```sql -SELECT getCientHTTPHeader('test') -``` -result: - -```text -┌─getClientHTTPHeader('test')─┐ -│ │ -└────────────------───────────┘ -``` -Try to use http request: -```shell -echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @- - -#result -default -``` - ## FQDN Returns the fully qualified domain name of the ClickHouse server. @@ -666,6 +628,8 @@ SELECT formatReadableSize(filesize_bytes) AS filesize ``` +Alias: `FORMAT_BYTES`. + ``` text ┌─filesize_bytes─┬─filesize───┐ │ 1 │ 1.00 B │ @@ -2867,3 +2831,92 @@ Result: │ SELECT a, b FROM tab WHERE (a > 3) AND (b < 3) │ └─────────────────────────────────────────────────────────────────────────┘ ``` + +## minSampleSizeConversion + +Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples. + +**Syntax** + +``` sql +minSampleSizeConversion(baseline, mde, power, alpha) +``` + +Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the sample size required for one group (i.e. the sample size required for the whole experiment is twice the returned value). + +**Arguments** + +- `baseline` — Baseline conversion. [Float](../data-types/float.md). +- `mde` — Minimum detectable effect (MDE) as percentage points (e.g. for a baseline conversion 0.25 the MDE 0.03 means an expected change to 0.25 ± 0.03). [Float](../data-types/float.md). +- `power` — Required statistical power of a test (1 - probability of Type II error). [Float](../data-types/float.md). +- `alpha` — Required significance level of a test (probability of Type I error). [Float](../data-types/float.md). + +**Returned value** + +A named [Tuple](../data-types/tuple.md) with 3 elements: + +- `"minimum_sample_size"` — Required sample size. [Float64](../data-types/float.md). +- `"detect_range_lower"` — Lower bound of the range of values not detectable with the returned required sample size (i.e. all values less than or equal to `"detect_range_lower"` are detectable with the provided `alpha` and `power`). Calculated as `baseline - mde`. [Float64](../data-types/float.md). +- `"detect_range_upper"` — Upper bound of the range of values not detectable with the returned required sample size (i.e. all values greater than or equal to `"detect_range_upper"` are detectable with the provided `alpha` and `power`). Calculated as `baseline + mde`. [Float64](../data-types/float.md). + +**Example** + +The following query calculates the required sample size for an A/B test with baseline conversion of 25%, MDE of 3%, significance level of 5%, and the desired statistical power of 80%: + +``` sql +SELECT minSampleSizeConversion(0.25, 0.03, 0.80, 0.05) AS sample_size; +``` + +Result: + +``` text +┌─sample_size───────────────────┐ +│ (3396.077603219163,0.22,0.28) │ +└───────────────────────────────┘ +``` + +## minSampleSizeContinuous + +Calculates minimum required sample size for an A/B test comparing means of a continuous metric in two samples. + +**Syntax** + +``` sql +minSampleSizeContinous(baseline, sigma, mde, power, alpha) +``` + +Alias: `minSampleSizeContinous` + +Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the required sample size for one group (i.e. the sample size required for the whole experiment is twice the returned value). Also assumes equal variance of the test metric in treatment and control groups. + +**Arguments** + +- `baseline` — Baseline value of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `sigma` — Baseline standard deviation of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `mde` — Minimum detectable effect (MDE) as percentage of the baseline value (e.g. for a baseline value 112.25 the MDE 0.03 means an expected change to 112.25 ± 112.25*0.03). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `power` — Required statistical power of a test (1 - probability of Type II error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). +- `alpha` — Required significance level of a test (probability of Type I error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md). + +**Returned value** + +A named [Tuple](../data-types/tuple.md) with 3 elements: + +- `"minimum_sample_size"` — Required sample size. [Float64](../data-types/float.md). +- `"detect_range_lower"` — Lower bound of the range of values not detectable with the returned required sample size (i.e. all values less than or equal to `"detect_range_lower"` are detectable with the provided `alpha` and `power`). Calculated as `baseline * (1 - mde)`. [Float64](../data-types/float.md). +- `"detect_range_upper"` — Upper bound of the range of values not detectable with the returned required sample size (i.e. all values greater than or equal to `"detect_range_upper"` are detectable with the provided `alpha` and `power`). Calculated as `baseline * (1 + mde)`. [Float64](../data-types/float.md). + +**Example** + +The following query calculates the required sample size for an A/B test on a metric with baseline value of 112.25, standard deviation of 21.1, MDE of 3%, significance level of 5%, and the desired statistical power of 80%: + +``` sql +SELECT minSampleSizeContinous(112.25, 21.1, 0.03, 0.80, 0.05) AS sample_size; +``` + +Result: + +``` text +┌─sample_size───────────────────────────┐ +│ (616.2931945826209,108.8825,115.6175) │ +└───────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 1940993ce0b..20694211912 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -393,40 +393,6 @@ Reverses the sequence of bytes in a string. Reverses a sequence of Unicode code points in a string. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## format - -Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers). - -**Syntax** - -```sql -format(pattern, s0, s1, …) -``` - -**Example** - -``` sql -SELECT format('{1} {0} {1}', 'World', 'Hello') -``` - -```result -┌─format('{1} {0} {1}', 'World', 'Hello')─┐ -│ Hello World Hello │ -└─────────────────────────────────────────┘ -``` - -With implicit numbers: - -``` sql -SELECT format('{} {}', 'Hello', 'World') -``` - -```result -┌─format('{} {}', 'Hello', 'World')─┐ -│ Hello World │ -└───────────────────────────────────┘ -``` - ## concat Concatenates the given arguments. @@ -567,8 +533,8 @@ Result: ```result ┌─concatWithSeparator('a', '1', '2', '3', '4')─┐ -│ 1a2a3a4 │ -└───────────────────────────────────┘ +│ 1a2a3a4 │ +└──────────────────────────────────────────────┘ ``` ## concatWithSeparatorAssumeInjective @@ -577,26 +543,52 @@ Like `concatWithSeparator` but assumes that `concatWithSeparator(sep, expr1, exp A function is called injective if it returns for different arguments different results. In other words: different arguments never produce identical result. -## substring(s, offset, length) +## substring -Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1. +Returns the substring of a string `s` which starts at the specified byte index `offset`. Byte counting starts from 1. If `offset` is 0, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have. **Syntax** ```sql -substring(s, offset, length) +substring(s, offset[, length]) ``` Alias: - `substr` - `mid` +**Arguments** + +- `s` — The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) +- `offset` — The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). +- `length` — The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. + +**Returned value** + +A substring of `s` with `length` many bytes, starting at index `offset`. + +Type: `String`. + +**Example** + +``` sql +SELECT 'database' AS db, substr(db, 5), substr(db, 5, 1) +``` + +Result: + +```result +┌─db───────┬─substring('database', 5)─┬─substring('database', 5, 1)─┐ +│ database │ base │ b │ +└──────────┴──────────────────────────┴─────────────────────────────┘ +``` + ## substringUTF8 Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## substringIndex(s, delim, count) +## substringIndex Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL. @@ -627,7 +619,7 @@ Result: └──────────────────────────────────────────────┘ ``` -## substringIndexUTF8(s, delim, count) +## substringIndexUTF8 Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. @@ -1259,7 +1251,7 @@ This function also replaces numeric character references with Unicode characters **Syntax** ``` sql -decodeHTMComponent(x) +decodeHTMLComponent(x) ``` **Arguments** @@ -1276,7 +1268,7 @@ Type: [String](../../sql-reference/data-types/string.md). ``` sql SELECT decodeHTMLComponent(''CH'); -SELECT decodeHMLComponent('I♥ClickHouse'); +SELECT decodeHTMLComponent('I♥ClickHouse'); ``` Result: diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 74d5d747193..c7bd16cad4a 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -132,6 +132,40 @@ For more information, see [RE2](https://github.com/google/re2/blob/master/re2/re regexpQuoteMeta(s) ``` +## format + +Format the `pattern` string with the values (strings, integers, etc.) listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers). + +**Syntax** + +```sql +format(pattern, s0, s1, …) +``` + +**Example** + +``` sql +SELECT format('{1} {0} {1}', 'World', 'Hello') +``` + +```result +┌─format('{1} {0} {1}', 'World', 'Hello')─┐ +│ Hello World Hello │ +└─────────────────────────────────────────┘ +``` + +With implicit numbers: + +``` sql +SELECT format('{} {}', 'Hello', 'World') +``` + +```result +┌─format('{} {}', 'Hello', 'World')─┐ +│ Hello World │ +└───────────────────────────────────┘ +``` + ## translate Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings. `from` and `to` must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified. diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index e183fdcdcd7..434432baa48 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -11,8 +11,7 @@ Below functions are used for time series analysis. ## seriesPeriodDetectFFT Finds the period of the given time series data using FFT -Detect Period in time series data using FFT. -FFT - Fast Fourier transform (https://en.wikipedia.org/wiki/Fast_Fourier_transform) +FFT - [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform) **Syntax** @@ -27,6 +26,7 @@ seriesPeriodDetectFFT(series); **Returned value** - A real value equal to the period of time series +- Returns NAN when number of data points are less than four. Type: [Float64](../../sql-reference/data-types/float.md). @@ -45,3 +45,15 @@ Result: │ 3 │ └────────────────────────┘ ``` + +``` sql +SELECT seriesPeriodDetectFFT(arrayMap(x -> abs((x % 6) - 3), range(1000))) AS print_0; +``` + +Result: + +``` text +┌─print_0─┐ +│ 6 │ +└─────────┘ +``` diff --git a/docs/en/sql-reference/statements/alter/apply-deleted-mask.md b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md new file mode 100644 index 00000000000..7a11d66e739 --- /dev/null +++ b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md @@ -0,0 +1,22 @@ +--- +slug: /en/sql-reference/statements/alter/apply-deleted-mask +sidebar_position: 46 +sidebar_label: APPLY DELETED MASK +--- + +# Apply mask of deleted rows + +``` sql +ALTER TABLE [db].name [ON CLUSTER cluster] APPLY DELETED MASK [IN PARTITION partition_id] +``` + +The command applies mask created by [lightweight delete](/docs/en/sql-reference/statements/delete) and forcefully removes rows marked as deleted from disk. This command is a heavyweight mutation and it semantically equals to query ```ALTER TABLE [db].name DELETE WHERE _row_exists = 0```. + +:::note +It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). +::: + +**See also** + +- [Lightweight deletes](/docs/en/sql-reference/statements/delete) +- [Heavyweight deletes](/docs/en/sql-reference/statements/alter/delete.md) diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index d28542e0a43..dc6668c7983 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -17,8 +17,9 @@ Most `ALTER TABLE` queries modify table settings or data: - [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md) - [TTL](/docs/en/sql-reference/statements/alter/ttl.md) - [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md) +- [APPLY DELETED MASK](/docs/en/sql-reference/statements/alter/apply-deleted-mask.md) -:::note +:::note Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md). ::: @@ -59,7 +60,7 @@ For all `ALTER` queries, you can use the [alter_sync](/docs/en/operations/settin You can specify how long (in seconds) to wait for inactive replicas to execute all `ALTER` queries with the [replication_wait_for_inactive_replica_timeout](/docs/en/operations/settings/settings.md/#replication-wait-for-inactive-replica-timeout) setting. -:::note +:::note For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. ::: diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index 5c5bf0355f6..517e64e3e5b 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -6,28 +6,28 @@ sidebar_label: VIEW # ALTER TABLE … MODIFY QUERY Statement -You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. +You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. -The `allow_experimental_alter_materialized_view_structure` setting must be enabled. +The `allow_experimental_alter_materialized_view_structure` setting must be enabled. This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. **Example with TO table** ```sql -CREATE TABLE events (ts DateTime, event_type String) +CREATE TABLE events (ts DateTime, event_type String) ENGINE = MergeTree ORDER BY (event_type, ts); -CREATE TABLE events_by_day (ts DateTime, event_type String, events_cnt UInt64) +CREATE TABLE events_by_day (ts DateTime, event_type String, events_cnt UInt64) ENGINE = SummingMergeTree ORDER BY (event_type, ts); -CREATE MATERIALIZED VIEW mv TO events_by_day AS +CREATE MATERIALIZED VIEW mv TO events_by_day AS SELECT toStartOfDay(ts) ts, event_type, count() events_cnt FROM events -GROUP BY ts, event_type; +GROUP BY ts, event_type; -INSERT INTO events -SELECT Date '2020-01-01' + interval number * 900 second, +INSERT INTO events +SELECT Date '2020-01-01' + interval number * 900 second, ['imp', 'click'][number%2+1] FROM numbers(100); @@ -43,23 +43,23 @@ ORDER BY ts, event_type; │ 2020-01-02 00:00:00 │ imp │ 2 │ └─────────────────────┴────────────┴─────────────────┘ --- Let's add the new measurment `cost` +-- Let's add the new measurment `cost` -- and the new dimension `browser`. -ALTER TABLE events +ALTER TABLE events ADD COLUMN browser String, ADD COLUMN cost Float64; -- Column do not have to match in a materialized view and TO -- (destination table), so the next alter does not break insertion. -ALTER TABLE events_by_day +ALTER TABLE events_by_day ADD COLUMN cost Float64, ADD COLUMN browser String after event_type, MODIFY ORDER BY (event_type, ts, browser); -INSERT INTO events -SELECT Date '2020-01-02' + interval number * 900 second, +INSERT INTO events +SELECT Date '2020-01-02' + interval number * 900 second, ['imp', 'click'][number%2+1], ['firefox', 'safary', 'chrome'][number%3+1], 10/(number+1)%33 @@ -82,16 +82,16 @@ ORDER BY ts, event_type; └─────────────────────┴────────────┴─────────┴────────────┴──────┘ SET allow_experimental_alter_materialized_view_structure=1; - -ALTER TABLE mv MODIFY QUERY + +ALTER TABLE mv MODIFY QUERY SELECT toStartOfDay(ts) ts, event_type, browser, count() events_cnt, sum(cost) cost FROM events GROUP BY ts, event_type, browser; -INSERT INTO events -SELECT Date '2020-01-03' + interval number * 900 second, +INSERT INTO events +SELECT Date '2020-01-03' + interval number * 900 second, ['imp', 'click'][number%2+1], ['firefox', 'safary', 'chrome'][number%3+1], 10/(number+1)%33 @@ -138,7 +138,7 @@ PRIMARY KEY (event_type, ts) ORDER BY (event_type, ts, browser) SETTINGS index_granularity = 8192 --- !!! The columns' definition is unchanged but it does not matter, we are not quering +-- !!! The columns' definition is unchanged but it does not matter, we are not quering -- MATERIALIZED VIEW, we are quering TO (storage) table. -- SELECT section is updated. @@ -169,7 +169,7 @@ The application is very limited because you can only change the `SELECT` section ```sql CREATE TABLE src_table (`a` UInt32) ENGINE = MergeTree ORDER BY a; -CREATE MATERIALIZED VIEW mv (`a` UInt32) ENGINE = MergeTree ORDER BY a AS SELECT a FROM src_table; +CREATE MATERIALIZED VIEW mv (`a` UInt32) ENGINE = MergeTree ORDER BY a AS SELECT a FROM src_table; INSERT INTO src_table (a) VALUES (1), (2); SELECT * FROM mv; ``` @@ -199,3 +199,7 @@ SELECT * FROM mv; ## ALTER LIVE VIEW Statement `ALTER LIVE VIEW ... REFRESH` statement refreshes a [Live view](../create/view.md#live-view). See [Force Live View Refresh](../create/view.md#live-view-alter-refresh). + +## ALTER TABLE … MODIFY REFRESH Statement + +`ALTER TABLE ... MODIFY REFRESH` statement changes refresh parameters of a [Refreshable Materialized View](../create/view.md#refreshable-materialized-view). See [Changing Refresh Parameters](../create/view.md#changing-refresh-parameters). diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 2a8d6788889..f6158acd9a4 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -37,6 +37,7 @@ SELECT a, b, c FROM (SELECT ...) ``` ## Parameterized View + Parametrized views are similar to normal views, but can be created with parameters which are not resolved immediately. These views can be used with table functions, which specify the name of the view as function name and the parameter values as its arguments. ``` sql @@ -66,7 +67,7 @@ When creating a materialized view with `TO [db].[table]`, you can't also use `PO A materialized view is implemented as follows: when inserting data to the table specified in `SELECT`, part of the inserted data is converted by this `SELECT` query, and the result is inserted in the view. -:::note +:::note Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views. Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view. @@ -90,156 +91,122 @@ Views look the same as normal tables. For example, they are listed in the result To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well. -## Live View [Experimental] +## Live View [Deprecated] -:::note -This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. +This feature is deprecated and will be removed in the future. + +For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md) + +## Refreshable Materialized View {#refreshable-materialized-view} + +```sql +CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name +REFRESH EVERY|AFTER interval [OFFSET interval] +RANDOMIZE FOR interval +DEPENDS ON [db.]name [, [db.]name [, ...]] +[TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY] +AS SELECT ... +``` +where `interval` is a sequence of simple intervals: +```sql +number SECOND|MINUTE|HOUR|DAY|WEEK|MONTH|YEAR +``` + +Periodically runs the corresponding query and stores its result in a table, atomically replacing the table's previous contents. + +Differences from regular non-refreshable materialized views: + * No insert trigger. I.e. when new data is inserted into the table specified in SELECT, it's *not* automatically pushed to the refreshable materialized view. The periodic refresh runs the entire query and replaces the entire table. + * No restrictions on the SELECT query. Table functions (e.g. `url()`), views, UNION, JOIN, are all allowed. + +:::note +Refreshable materialized views are a work in progress. Setting `allow_experimental_refreshable_materialized_view = 1` is required for creating one. Current limitations: + * not compatible with Replicated database or table engines, + * require [Atomic database engine](../../../engines/database-engines/atomic.md), + * no retries for failed refresh - we just skip to the next scheduled refresh time, + * no limit on number of concurrent refreshes. ::: +### Refresh Schedule + +Example refresh schedules: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... +REFRESH EVERY 1 DAY -- every day, at midnight (UTC) +REFRESH EVERY 1 MONTH -- on 1st day of every month, at midnight +REFRESH EVERY 1 MONTH OFFSET 5 DAY 2 HOUR -- on 6th day of every month, at 2:00 am +REFRESH EVERY 2 WEEK OFFSET 5 DAY 15 HOUR 10 MINUTE -- every other Saturday, at 3:10 pm +REFRESH EVERY 30 MINUTE -- at 00:00, 00:30, 01:00, 01:30, etc +REFRESH AFTER 30 MINUTE -- 30 minutes after the previous refresh completes, no alignment with time of day +-- REFRESH AFTER 1 HOUR OFFSET 1 MINUTE -- syntax errror, OFFSET is not allowed with AFTER ``` -Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. +`RANDOMIZE FOR` randomly adjusts the time of each refresh, e.g.: +```sql +REFRESH EVERY 1 DAY OFFSET 2 HOUR RANDOMIZE FOR 1 HOUR -- every day at random time between 01:30 and 02:30 +``` -Live views are triggered by insert into the innermost table specified in the query. +At most one refresh may be running at a time, for a given view. E.g. if a view with `REFRESH EVERY 1 MINUTE` takes 2 minutes to refresh, it'll just be refreshing every 2 minutes. If it then becomes faster and starts refreshing in 10 seconds, it'll go back to refreshing every minute. (In particular, it won't refresh every 10 seconds to catch up with a backlog of missed refreshes - there's no such backlog.) -Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery. +Additionally, a refresh is started immediately after the materialized view is created, unless `EMPTY` is specified in the `CREATE` query. If `EMPTY` is specified, the first refresh happens according to schedule. -:::info -- [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table. -- Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view. -- Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved. -- Does not work with replicated or distributed tables where inserts are performed on different nodes. -- Can't be triggered by multiple tables. +### Dependencies {#refresh-dependencies} -See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround. +`DEPENDS ON` synchronizes refreshes of different tables. By way of example, suppose there's a chain of two refreshable materialized views: +```sql +CREATE MATERIALIZED VIEW source REFRESH EVERY 1 DAY AS SELECT * FROM url(...) +CREATE MATERIALIZED VIEW destination REFRESH EVERY 1 DAY AS SELECT ... FROM source +``` +Without `DEPENDS ON`, both views will start a refresh at midnight, and `destination` typically will see yesterday's data in `source`. If we add dependency: +``` +CREATE MATERIALIZED VIEW destination REFRESH EVERY 1 DAY DEPENDS ON source AS SELECT ... FROM source +``` +then `destination`'s refresh will start only after `source`'s refresh finished for that day, so `destination` will be based on fresh data. + +Alternatively, the same result can be achieved with: +``` +CREATE MATERIALIZED VIEW destination REFRESH AFTER 1 HOUR DEPENDS ON source AS SELECT ... FROM source +``` +where `1 HOUR` can be any duration less than `source`'s refresh period. The dependent table won't be refreshed more frequently than any of its dependencies. This is a valid way to set up a chain of refreshable views without specifying the real refresh period more than once. + +A few more examples: + * `REFRESH EVERY 1 DAY OFFSET 10 MINUTE` (`destination`) depends on `REFRESH EVERY 1 DAY` (`source`)
+ If `source` refresh takes more than 10 minutes, `destination` will wait for it. + * `REFRESH EVERY 1 DAY OFFSET 1 HOUR` depends on `REFRESH EVERY 1 DAY OFFSET 23 HOUR`
+ Similar to the above, even though the corresponding refreshes happen on different calendar days. + `destination`'s refresh on day X+1 will wait for `source`'s refresh on day X (if it takes more than 2 hours). + * `REFRESH EVERY 2 HOUR` depends on `REFRESH EVERY 1 HOUR`
+ The 2 HOUR refresh happens after the 1 HOUR refresh for every other hour, e.g. after the midnight + refresh, then after the 2am refresh, etc. + * `REFRESH EVERY 1 MINUTE` depends on `REFRESH EVERY 2 HOUR`
+ `REFRESH AFTER 1 MINUTE` depends on `REFRESH EVERY 2 HOUR`
+ `REFRESH AFTER 1 MINUTE` depends on `REFRESH AFTER 2 HOUR`
+ `destination` is refreshed once after every `source` refresh, i.e. every 2 hours. The `1 MINUTE` is effectively ignored. + * `REFRESH AFTER 1 HOUR` depends on `REFRESH AFTER 1 HOUR`
+ Currently this is not recommended. + +:::note +`DEPENDS ON` only works between refreshable materialized views. Listing a regular table in the `DEPENDS ON` list will prevent the view from ever refreshing (dependencies can be removed with `ALTER`, see below). ::: -### Monitoring Live View Changes +### Changing Refresh Parameters {#changing-refresh-parameters} -You can monitor changes in the `LIVE VIEW` query result using [WATCH](../../../sql-reference/statements/watch.md) query. - -```sql -WATCH [db.]live_view +To change refresh parameters: +``` +ALTER TABLE [db.]name MODIFY REFRESH EVERY|AFTER ... [RANDOMIZE FOR ...] [DEPENDS ON ...] ``` -**Example:** +:::note +This replaces refresh schedule *and* dependencies. If the table had a `DEPENDS ON`, doing a `MODIFY REFRESH` without `DEPENDS ON` will remove the dependencies. +::: -```sql -CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; -CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt; -``` -Watch a live view while doing a parallel insert into the source table. +### Other operations -```sql -WATCH lv; -``` +The status of all refreshable materialized views is available in table [`system.view_refreshes`](../../../operations/system-tables/view_refreshes.md). In particular, it contains refresh progress (if running), last and next refresh time, exception message if a refresh failed. -```bash -┌─sum(x)─┬─_version─┐ -│ 1 │ 1 │ -└────────┴──────────┘ -┌─sum(x)─┬─_version─┐ -│ 3 │ 2 │ -└────────┴──────────┘ -┌─sum(x)─┬─_version─┐ -│ 6 │ 3 │ -└────────┴──────────┘ -``` - -```sql -INSERT INTO mt VALUES (1); -INSERT INTO mt VALUES (2); -INSERT INTO mt VALUES (3); -``` - -Or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events. - -```sql -WATCH [db.]live_view EVENTS; -``` - -**Example:** - -```sql -WATCH lv EVENTS; -``` - -```bash -┌─version─┐ -│ 1 │ -└─────────┘ -┌─version─┐ -│ 2 │ -└─────────┘ -┌─version─┐ -│ 3 │ -└─────────┘ -``` - -You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables. - -```sql -SELECT * FROM [db.]live_view WHERE ... -``` - -### Force Live View Refresh - -You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement. - -### WITH REFRESH Clause - -When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger. - -```sql -CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ... -``` - -If the refresh value is not specified then the value specified by the [periodic_live_view_refresh](../../../operations/settings/settings.md#periodic-live-view-refresh) setting is used. - -**Example:** - -```sql -CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now(); -WATCH lv -``` - -```bash -┌───────────────now()─┬─_version─┐ -│ 2021-02-21 08:47:05 │ 1 │ -└─────────────────────┴──────────┘ -┌───────────────now()─┬─_version─┐ -│ 2021-02-21 08:47:10 │ 2 │ -└─────────────────────┴──────────┘ -┌───────────────now()─┬─_version─┐ -│ 2021-02-21 08:47:15 │ 3 │ -└─────────────────────┴──────────┘ -``` - -```sql -WATCH lv -``` - -``` -Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv does not exist.. -``` - -### Live View Usage - -Most common uses of live view tables include: - -- Providing push notifications for query result changes to avoid polling. -- Caching results of most frequent queries to provide immediate query results. -- Watching for table changes and triggering a follow-up select queries. -- Watching metrics from system tables using periodic refresh. - -**See Also** -- [ALTER LIVE VIEW](../alter/view.md#alter-live-view) +To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|CANCEL VIEW`](../system.md#refreshable-materialized-views). ## Window View [Experimental] -:::info +:::info This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. ::: diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index e0cc98c2351..f9d93305071 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ Inserts data into a table. **Syntax** ``` sql -INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] [SETTINGS ...] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -126,7 +126,7 @@ To insert a default value instead of `NULL` into a column with not nullable data **Syntax** ``` sql -INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] [SETTINGS ...] [FORMAT format_name] ``` Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause. diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 07b5a196096..b5fc0a23745 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -17,7 +17,7 @@ This query tries to initialize an unscheduled merge of data parts for tables. No OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] ``` -The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported. +The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family (including [materialized views](../../sql-reference/statements/create/view.md#materialized-view)) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported. When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `2`) or on current replica (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `1`). diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index a4f449ad321..06742ff74e2 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -34,7 +34,7 @@ Queries that use `FINAL` are executed slightly slower than similar queries that - Data is merged during query execution. - Queries with `FINAL` read primary key columns in addition to the columns specified in the query. -**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine haven’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). +`FINAL` requires additional compute and memory resources, as the processing that normally would occur at merge time must occur in memory at the time of the query. However, using FINAL is sometimes necessary in order to produce accurate results, and is less expensive than running `OPTIMIZE` to force a merge. It is also sometimes possible to use different queries that assume the background processes of the `MergeTree` engine haven’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). If you need to use FINAL in your queries in order to get the required results, then it is okay to do so but be aware of the additional processing required. `FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile. diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index 352af16042a..5b7196f13e3 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -12,7 +12,7 @@ Compressed files are supported. Compression type is detected by the extension of **Syntax** ```sql -SELECT INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION type [LEVEL level]] +SELECT INTO OUTFILE file_name [AND STDOUT] [APPEND | TRUNCATE] [COMPRESSION type [LEVEL level]] ``` `file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`. @@ -26,6 +26,7 @@ SELECT INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION typ - The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it. - If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output. - If `APPEND` is mentioned in the query then the output is appended to an existing file. If compression is used, append cannot be used. +- When writing to a file that already exists, `APPEND` or `TRUNCATE` must be used. **Example** diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 281a1d0436c..0529be06b5d 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -43,22 +43,23 @@ Additional join types available in ClickHouse: - `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on “join keys”, without producing a cartesian product. - `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. - `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. +- `PASTE JOIN`, performs a horizontal concatenation of two tables. :::note -When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). +When [join_algorithm](../../../operations/settings/settings.md#join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). ::: ## Settings -The default join type can be overridden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting. +The default join type can be overridden using [join_default_strictness](../../../operations/settings/settings.md#join_default_strictness) setting. The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. **See also** -- [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) -- [join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row) +- [join_algorithm](../../../operations/settings/settings.md#join_algorithm) +- [join_any_take_last_row](../../../operations/settings/settings.md#join_any_take_last_row) - [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls) - [partial_merge_join_optimizations](../../../operations/settings/settings.md#partial_merge_join_optimizations) - [partial_merge_join_rows_in_right_blocks](../../../operations/settings/settings.md#partial_merge_join_rows_in_right_blocks) @@ -269,6 +270,33 @@ For example, consider the following tables: `ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. ::: +## PASTE JOIN Usage + +The result of `PASTE JOIN` is a table that contains all columns from left subquery followed by all columns from the right subquery. +The rows are matched based on their positions in the original tables (the order of rows should be defined). +If the subqueries return a different number of rows, extra rows will be cut. + +Example: +```SQL +SELECT * +FROM +( + SELECT number AS a + FROM numbers(2) +) AS t1 +PASTE JOIN +( + SELECT number AS a + FROM numbers(2) + ORDER BY a DESC +) AS t2 + +┌─a─┬─t2.a─┐ +│ 0 │ 1 │ +│ 1 │ 0 │ +└───┴──────┘ +``` + ## Distributed JOIN There are two ways to execute join involving distributed tables: @@ -352,7 +380,7 @@ If you need a `JOIN` for joining with dimension tables (these are relatively sma ### Memory Limitations -By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the right_table and creates a hash table for it in RAM. If `join_algorithm = 'auto'` is enabled, then after some threshold of memory consumption, ClickHouse falls back to [merge](https://en.wikipedia.org/wiki/Sort-merge_join) join algorithm. For `JOIN` algorithms description see the [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) setting. +By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the right_table and creates a hash table for it in RAM. If `join_algorithm = 'auto'` is enabled, then after some threshold of memory consumption, ClickHouse falls back to [merge](https://en.wikipedia.org/wiki/Sort-merge_join) join algorithm. For `JOIN` algorithms description see the [join_algorithm](../../../operations/settings/settings.md#join_algorithm) setting. If you need to restrict `JOIN` operation memory consumption use the following settings: diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 53bdc9041a1..d6432a7b4f8 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -5,12 +5,22 @@ sidebar_label: ORDER BY # ORDER BY Clause -The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`. Sorting is case-sensitive. +The `ORDER BY` clause contains -If you want to sort by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). +- a list of expressions, e.g. `ORDER BY visits, search_phrase`, +- a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or +- `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`. -Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time). -If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well. +To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0. +To disable sorting by `ALL`, set setting [enable_order_by_all](../../../operations/settings/settings.md#enable-order-by-all) = 0. + +The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction. +Unless an explicit sort order is specified, `ASC` is used by default. +The sorting direction applies to a single expression, not to the entire list, e.g. `ORDER BY Visits DESC, SearchPhrase`. +Also, sorting is performed case-sensitively. + +Rows with identical values for a sort expressions are returned in an arbitrary and non-deterministic order. +If the `ORDER BY` clause is omitted in a `SELECT` statement, the row order is also arbitrary and non-deterministic. ## Sorting of Special Values @@ -265,8 +275,9 @@ Consider disabling `optimize_read_in_order` manually, when running queries that Optimization is supported in the following table engines: -- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) -- [Merge](../../../engines/table-engines/special/merge.md), [Buffer](../../../engines/table-engines/special/buffer.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) table engines over `MergeTree`-engine tables +- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (including [materialized views](../../../sql-reference/statements/create/view.md#materialized-view)), +- [Merge](../../../engines/table-engines/special/merge.md), +- [Buffer](../../../engines/table-engines/special/buffer.md) In `MaterializedView`-engine tables the optimization works with views like `SELECT ... FROM merge_tree_table ORDER BY pk`. But it is not supported in the queries like `SELECT ... FROM view ORDER BY pk` if the view query does not have the `ORDER BY` clause. diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 177a8283f38..0fdbbeac235 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -150,7 +150,7 @@ SYSTEM RELOAD CONFIG [ON CLUSTER cluster_name] ## RELOAD USERS -Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. +Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. ```sql SYSTEM RELOAD USERS [ON CLUSTER cluster_name] @@ -354,7 +354,7 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name` ### SYNC DATABASE REPLICA -Waits until the specified [replicated database](https://clickhouse.com/docs/en/engines/database-engines/replicated) applies all schema changes from the DDL queue of that database. +Waits until the specified [replicated database](https://clickhouse.com/docs/en/engines/database-engines/replicated) applies all schema changes from the DDL queue of that database. **Syntax** ```sql @@ -449,14 +449,14 @@ SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name] ``` -### SYSTEM STOP LISTEN +## SYSTEM STOP LISTEN -Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. +Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. ```sql -SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] ``` - If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. @@ -464,12 +464,56 @@ SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QU - If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause. - If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause. -### SYSTEM START LISTEN +## SYSTEM START LISTEN Allows new connections to be established on the specified protocols. However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. ```sql -SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` + +## Managing Refreshable Materialized Views {#refreshable-materialized-views} + +Commands to control background tasks performed by [Refreshable Materialized Views](../../sql-reference/statements/create/view.md#refreshable-materialized-view) + +Keep an eye on [`system.view_refreshes`](../../operations/system-tables/view_refreshes.md) while using them. + +### SYSTEM REFRESH VIEW + +Trigger an immediate out-of-schedule refresh of a given view. + +```sql +SYSTEM REFRESH VIEW [db.]name +``` + +### SYSTEM STOP VIEW, SYSTEM STOP VIEWS + +Disable periodic refreshing of the given view or all refreshable views. If a refresh is in progress, cancel it too. + +```sql +SYSTEM STOP VIEW [db.]name +``` +```sql +SYSTEM STOP VIEWS +``` + +### SYSTEM START VIEW, SYSTEM START VIEWS + +Enable periodic refreshing for the given view or all refreshable views. No immediate refresh is triggered. + +```sql +SYSTEM START VIEW [db.]name +``` +```sql +SYSTEM START VIEWS +``` + +### SYSTEM CANCEL VIEW + +If there's a refresh in progress for the given view, interrupt and cancel it. Otherwise do nothing. + +```sql +SYSTEM CANCEL VIEW [db.]name ``` diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index f5651c2dcb6..6dcb3e75e48 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -16,7 +16,7 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions). -Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. +Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. It allows for avoiding issues with large `INSERT` queries. When using the `Values` format in an `INSERT` query, it may seem that data is parsed the same as expressions in a `SELECT` query, but this is not true. The `Values` format is much more limited. diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index a083c6b89a6..ad92ab39183 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -55,5 +55,5 @@ Connection settings like `host`, `port`, `user`, `password`, `compression`, `sec **See Also** -- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) -- [load_balancing](../../operations/settings/settings.md#settings-load_balancing) +- [skip_unavailable_shards](../../operations/settings/settings.md#skip_unavailable_shards) +- [load_balancing](../../operations/settings/settings.md#load_balancing) diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index ad1feb87c60..3a63811add6 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -1,4 +1,4 @@ - -- +--- slug: /en/sql-reference/table-functions/file sidebar_position: 60 sidebar_label: file @@ -128,17 +128,17 @@ Reading data from `table.csv`, located in `archive1.zip` or/and `archive2.zip`: SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); ``` -## Globbing {#globs_in_path} +## Globs in path {#globs_in_path} Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. - `*` — Represents arbitrarily many characters except `/` but including the empty string. - `?` — Represents an arbitrary single character. -- `{some_string,another_string,yet_another_one}` — Represents any of alternative strings `'some_string', 'another_string', 'yet_another_one'`. The strings may contain `/`. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol. - `{N..M}` — Represents any number `>= N` and `<= M`. - `**` - Represents all files inside a folder recursively. -Constructions with `{}` are similar to the [remote](remote.md) table function. +Constructions with `{}` are similar to the [remote](remote.md) and [hdfs](hdfs.md) table functions. **Example** @@ -199,11 +199,11 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 ## Settings {#settings} -- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. +- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. - [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. - [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. +- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. **See Also** diff --git a/docs/en/sql-reference/table-functions/fileCluster.md b/docs/en/sql-reference/table-functions/fileCluster.md index 22ca132f136..2646250311c 100644 --- a/docs/en/sql-reference/table-functions/fileCluster.md +++ b/docs/en/sql-reference/table-functions/fileCluster.md @@ -45,7 +45,7 @@ $ cat /var/lib/clickhouse/user_files/test1.csv 1,"file1" 11,"file11" -$ cat /var/lib/clickhouse/user_files/test1.csv +$ cat /var/lib/clickhouse/user_files/test2.csv 2,"file2" 22,"file22" ``` diff --git a/docs/en/sql-reference/table-functions/fuzzJSON.md b/docs/en/sql-reference/table-functions/fuzzJSON.md index 74ccb0bcb8a..a64f35691f6 100644 --- a/docs/en/sql-reference/table-functions/fuzzJSON.md +++ b/docs/en/sql-reference/table-functions/fuzzJSON.md @@ -19,6 +19,7 @@ fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] }) - `json_str` (String) - The source string representing structured data in JSON format. - `random_seed` (UInt64) - Manual random seed for producing stable results. - `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer. + - `malform_output` (boolean) - Generate a string that cannot be parsed as a JSON object. - `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string. - `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range. - `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data. @@ -84,3 +85,13 @@ SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3; {"BRjE":16137826149911306846} {"XjKE":15076727133550123563} ``` + +``` sql +SELECT * FROM fuzzJSON(json_nc, json_str='{"name" : "FuzzJSON"}', random_seed=1337, malform_output=true) LIMIT 3; +``` + +``` text +U"name":"FuzzJSON*"SpByjZKtr2VAyHCO"falseh +{"name"keFuzzJSON, "g6vVO7TCIk":jTt^ +{"DBhz":YFuzzJSON5} +``` diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 31780e30e8e..92f904b8841 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -41,14 +41,14 @@ LIMIT 2 ## Globs in path {#globs_in_path} -Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). +Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. +- `*` — Represents arbitrarily many characters except `/` but including the empty string. +- `?` — Represents an arbitrary single character. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol. -- `{N..M}` — Substitutes any number in range from N to M including both borders. +- `{N..M}` — Represents any number `>= N` and `<= M`. -Constructions with `{}` are similar to the [remote](../../sql-reference/table-functions/remote.md)) table function. +Constructions with `{}` are similar to the [remote](remote.md) and [file](file.md) table functions. **Example** @@ -100,7 +100,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin ## Storage Settings {#storage-settings} -- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. +- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. - [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs. diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index 3ca177050d3..228f4a4c7e1 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -165,5 +165,5 @@ The following pattern types are supported. - `{0n..0m}` - A range of numbers with leading zeroes. This pattern preserves leading zeroes in indices. For instance, `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`. - `{a|b}` - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`. -The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting. +The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#load_balancing) setting. The number of generated addresses is limited by [table_function_remote_max_addresses](../../operations/settings/settings.md#table_function_remote_max_addresses) setting. diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index dc11259c626..8065f066666 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -16,7 +16,7 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer **Syntax** ``` sql -s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key [,session_token]] [,format] [,structure] [,compression]) ``` :::tip GCS @@ -38,6 +38,8 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ ::: - `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. @@ -236,7 +238,7 @@ LIMIT 5; ## Storage Settings {#storage-settings} -- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. +- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 799eb31446a..080c9860519 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -10,14 +10,15 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) and Google **Syntax** ``` sql -s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure]) +s3Cluster(cluster_name, source, [,access_key_id, secret_access_key, [session_token]] [,format] [,structure]) ``` **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. - `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index 49c4aade4e9..cd1297504af 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -493,7 +493,7 @@ catch (const DB::Exception & e) ``` cpp if (0 != close(fd)) - throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); + throw ErrnoException(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file {}", file_name); ``` `assert` не используются. diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 7195ee38af6..9f223157ea7 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -369,6 +369,9 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT | [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | | [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | | [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [has](../../../sql-reference/functions/array-functions.md#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ | +| [hasAny](../../../sql-reference/functions/array-functions.md#function-hasAny) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ | +| [hasAll](../../../sql-reference/functions/array-functions.md#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | Функции с постоянным агрументом, который меньше, чем размер ngram не могут использовать индекс `ngrambf_v1` для оптимизации запроса. @@ -681,7 +684,6 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); - `disk` — диск, находящийся внутри тома. - `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том. - `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты. -- `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками. Примеры конфигураций: @@ -718,7 +720,6 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); external - true diff --git a/docs/ru/getting-started/example-datasets/criteo.md b/docs/ru/getting-started/example-datasets/criteo.md index 5ba55795632..4818e9e69d4 100644 --- a/docs/ru/getting-started/example-datasets/criteo.md +++ b/docs/ru/getting-started/example-datasets/criteo.md @@ -1,14 +1,14 @@ --- slug: /ru/getting-started/example-datasets/criteo sidebar_position: 18 -sidebar_label: "Терабайт логов кликов от Criteo" +sidebar_label: "Терабайтный журнал посещений сайта от Criteo" --- -# Терабайт логов кликов от Criteo {#terabait-logov-klikov-ot-criteo} +# Терабайтный журнал посещений сайта от Criteo {#terabaitnyi-zhurnal} Скачайте данные с http://labs.criteo.com/downloads/download-terabyte-click-logs/ -Создайте таблицу для импорта лога: +Создайте таблицу для импорта журнала: ``` sql CREATE TABLE criteo_log (date Date, clicked UInt8, int1 Int32, int2 Int32, int3 Int32, int4 Int32, int5 Int32, int6 Int32, int7 Int32, int8 Int32, int9 Int32, int10 Int32, int11 Int32, int12 Int32, int13 Int32, cat1 String, cat2 String, cat3 String, cat4 String, cat5 String, cat6 String, cat7 String, cat8 String, cat9 String, cat10 String, cat11 String, cat12 String, cat13 String, cat14 String, cat15 String, cat16 String, cat17 String, cat18 String, cat19 String, cat20 String, cat21 String, cat22 String, cat23 String, cat24 String, cat25 String, cat26 String) ENGINE = Log @@ -69,7 +69,7 @@ CREATE TABLE criteo ) ENGINE = MergeTree(date, intHash32(icat1), (date, intHash32(icat1)), 8192) ``` -Преобразуем данные из сырого лога и положим во вторую таблицу: +Преобразуйте импортированные данные, разложив их по таблице сконвертированных данных: ``` sql INSERT INTO criteo SELECT date, clicked, int1, int2, int3, int4, int5, int6, int7, int8, int9, int10, int11, int12, int13, reinterpretAsUInt32(unhex(cat1)) AS icat1, reinterpretAsUInt32(unhex(cat2)) AS icat2, reinterpretAsUInt32(unhex(cat3)) AS icat3, reinterpretAsUInt32(unhex(cat4)) AS icat4, reinterpretAsUInt32(unhex(cat5)) AS icat5, reinterpretAsUInt32(unhex(cat6)) AS icat6, reinterpretAsUInt32(unhex(cat7)) AS icat7, reinterpretAsUInt32(unhex(cat8)) AS icat8, reinterpretAsUInt32(unhex(cat9)) AS icat9, reinterpretAsUInt32(unhex(cat10)) AS icat10, reinterpretAsUInt32(unhex(cat11)) AS icat11, reinterpretAsUInt32(unhex(cat12)) AS icat12, reinterpretAsUInt32(unhex(cat13)) AS icat13, reinterpretAsUInt32(unhex(cat14)) AS icat14, reinterpretAsUInt32(unhex(cat15)) AS icat15, reinterpretAsUInt32(unhex(cat16)) AS icat16, reinterpretAsUInt32(unhex(cat17)) AS icat17, reinterpretAsUInt32(unhex(cat18)) AS icat18, reinterpretAsUInt32(unhex(cat19)) AS icat19, reinterpretAsUInt32(unhex(cat20)) AS icat20, reinterpretAsUInt32(unhex(cat21)) AS icat21, reinterpretAsUInt32(unhex(cat22)) AS icat22, reinterpretAsUInt32(unhex(cat23)) AS icat23, reinterpretAsUInt32(unhex(cat24)) AS icat24, reinterpretAsUInt32(unhex(cat25)) AS icat25, reinterpretAsUInt32(unhex(cat26)) AS icat26 FROM criteo_log; diff --git a/docs/ru/getting-started/example-datasets/github-events.mdx b/docs/ru/getting-started/example-datasets/github-events.mdx index c6e58a9f5a4..84f445074af 100644 --- a/docs/ru/getting-started/example-datasets/github-events.mdx +++ b/docs/ru/getting-started/example-datasets/github-events.mdx @@ -1,9 +1,9 @@ --- slug: /ru/getting-started/example-datasets/github-events sidebar_label: GitHub Events -title: "GitHub Events Dataset" +title: "Набор данных о событиях на GitHub" --- -import Content from '@site/docs/en/getting-started/example-datasets/github-events.md'; +Набор данных о событиях на GitHub с 2011 года по 6 декабря 2020 года содержит 3,1 млрд записей. Объём исходных данных — 75 ГБ, для загрузки в Clickhouse потребуется около 200 ГБ свободного пространства хранения (при использовании метода сжатия lz4). - +Полное описание набора, инструкции по загрузке и запросы к нему опубликованы на https://ghe.clickhouse.tech/ diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e13ddd18030..2081dcc59b6 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2249,7 +2249,7 @@ SELECT * FROM test_table ## input_format_parallel_parsing {#input-format-parallel-parsing} -Включает или отключает режим, при котором входящие данные разбиваются на части, парсинг каждой из которых осуществляется параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow). +Включает или отключает режим, при котором входящие данные разбиваются на части, парсинг каждой из которых осуществляется параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TSKV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Возможные значения: @@ -2260,7 +2260,7 @@ SELECT * FROM test_table ## output_format_parallel_formatting {#output-format-parallel-formatting} -Включает или отключает режим, при котором исходящие данные форматируются параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow). +Включает или отключает режим, при котором исходящие данные форматируются параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TSKV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Возможные значения: diff --git a/docs/ru/operations/system-tables/storage_policies.md b/docs/ru/operations/system-tables/storage_policies.md index 8c3f117ca91..dbb91a8ec1a 100644 --- a/docs/ru/operations/system-tables/storage_policies.md +++ b/docs/ru/operations/system-tables/storage_policies.md @@ -13,6 +13,5 @@ slug: /ru/operations/system-tables/storage_policies - `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — имена дисков, содержащихся в политике хранения. - `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — максимальный размер куска данных, который может храниться на дисках тома (0 — без ограничений). - `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). -- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Значение настройки `prefer_not_to_merge`. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками. Если политика хранения содержит несколько томов, то каждому тому соответствует отдельная запись в таблице. diff --git a/docs/ru/operations/utilities/clickhouse-format.md b/docs/ru/operations/utilities/clickhouse-format.md index af66930b368..9c4b7304940 100644 --- a/docs/ru/operations/utilities/clickhouse-format.md +++ b/docs/ru/operations/utilities/clickhouse-format.md @@ -1,115 +1,115 @@ ---- +--- slug: /ru/operations/utilities/clickhouse-format -sidebar_position: 65 -sidebar_label: clickhouse-format ---- - -# clickhouse-format {#clickhouse-format} - -Позволяет форматировать входящие запросы. - -Ключи: - -- `--help` или`-h` — выводит описание ключей. -- `--query` — форматирует запрос любой длины и сложности. -- `--hilite` — добавляет подсветку синтаксиса с экранированием символов. -- `--oneline` — форматирование в одну строку. -- `--quiet` или `-q` — проверяет синтаксис без вывода результата. -- `--multiquery` or `-n` — поддерживает несколько запросов в одной строке. -- `--obfuscate` — обфусцирует вместо форматирования. -- `--seed <строка>` — задает строку, которая определяет результат обфускации. -- `--backslash` — добавляет обратный слеш в конце каждой строки отформатированного запроса. Удобно использовать если многострочный запрос скопирован из интернета или другого источника и его нужно выполнить из командной строки. - -## Примеры {#examples} - -1. Форматирование запроса: - -```bash -$ clickhouse-format --query "select number from numbers(10) where number%2 order by number desc;" -``` - -Результат: - -```text -SELECT number -FROM numbers(10) -WHERE number % 2 -ORDER BY number DESC -``` - -2. Подсветка синтаксиса и форматирование в одну строку: - -```bash -$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);" -``` - -Результат: - -```sql -SELECT sum(number) FROM numbers(5) -``` - -3. Несколько запросов в одной строке: - -```bash -$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);" -``` - -Результат: - -```text -SELECT * -FROM -( - SELECT 1 AS x - UNION ALL - SELECT 1 - UNION DISTINCT - SELECT 3 -) -; -``` - -4. Обфускация: - -```bash -$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;" -``` - -Результат: - -```text -SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END; -``` - -Тот же запрос с другой инициализацией обфускатора: - -```bash -$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;" -``` - -Результат: - -```text -SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END; -``` - -5. Добавление обратного слеша: - -```bash -$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);" -``` - -Результат: - -```text -SELECT * \ -FROM \ -( \ - SELECT 1 AS x \ - UNION ALL \ - SELECT 1 \ - UNION DISTINCT \ - SELECT 3 \ -) -``` +sidebar_position: 65 +sidebar_label: clickhouse-format +--- + +# clickhouse-format {#clickhouse-format} + +Позволяет форматировать входящие запросы. + +Ключи: + +- `--help` или`-h` — выводит описание ключей. +- `--query` — форматирует запрос любой длины и сложности. +- `--hilite` — добавляет подсветку синтаксиса с экранированием символов. +- `--oneline` — форматирование в одну строку. +- `--quiet` или `-q` — проверяет синтаксис без вывода результата. +- `--multiquery` or `-n` — поддерживает несколько запросов в одной строке. +- `--obfuscate` — обфусцирует вместо форматирования. +- `--seed <строка>` — задает строку, которая определяет результат обфускации. +- `--backslash` — добавляет обратный слеш в конце каждой строки отформатированного запроса. Удобно использовать если многострочный запрос скопирован из интернета или другого источника и его нужно выполнить из командной строки. + +## Примеры {#examples} + +1. Форматирование запроса: + +```bash +$ clickhouse-format --query "select number from numbers(10) where number%2 order by number desc;" +``` + +Результат: + +```text +SELECT number +FROM numbers(10) +WHERE number % 2 +ORDER BY number DESC +``` + +2. Подсветка синтаксиса и форматирование в одну строку: + +```bash +$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);" +``` + +Результат: + +```sql +SELECT sum(number) FROM numbers(5) +``` + +3. Несколько запросов в одной строке: + +```bash +$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);" +``` + +Результат: + +```text +SELECT * +FROM +( + SELECT 1 AS x + UNION ALL + SELECT 1 + UNION DISTINCT + SELECT 3 +) +; +``` + +4. Обфускация: + +```bash +$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;" +``` + +Результат: + +```text +SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END; +``` + +Тот же запрос с другой инициализацией обфускатора: + +```bash +$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;" +``` + +Результат: + +```text +SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END; +``` + +5. Добавление обратного слеша: + +```bash +$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);" +``` + +Результат: + +```text +SELECT * \ +FROM \ +( \ + SELECT 1 AS x \ + UNION ALL \ + SELECT 1 \ + UNION DISTINCT \ + SELECT 3 \ +) +``` diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md index 958a4bd3504..3b36ee04095 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md @@ -1,62 +1,62 @@ ---- -slug: /ru/sql-reference/aggregate-functions/reference/sparkbar -sidebar_position: 311 -sidebar_label: sparkbar ---- - -# sparkbar {#sparkbar} - -Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются. - -Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`. -Значения `x` вне указанного интервала игнорируются. - - -**Синтаксис** - -``` sql -sparkbar(width[, min_x, max_x])(x, y) -``` - -**Параметры** - -- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md). - -- `min_x` — Начало интервала. Необязательный параметр. -- `max_x` — Конец интервала. Необязательный параметр. - -**Аргументы** - -- `x` — Поле со значениями. -- `y` — Поле с частотой повторения значений. - - -**Возвращаемые значения** - -- Гистограмма частот. - -**Пример** - -Запрос: - -``` sql -CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date; - -INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11'); - -SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); - -SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); -``` - -Результат: - -``` text -┌─sparkbar(9)(event_date, cnt)─┐ -│ ▂▅▂▃▆█ ▂ │ -└──────────────────────────────┘ - -┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ -│ ▂▅▂▃▇▆█ │ -└──────────────────────────────────────────────────────────────────────────┘ -``` +--- +slug: /ru/sql-reference/aggregate-functions/reference/sparkbar +sidebar_position: 311 +sidebar_label: sparkbar +--- + +# sparkbar {#sparkbar} + +Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются. + +Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`. +Значения `x` вне указанного интервала игнорируются. + + +**Синтаксис** + +``` sql +sparkbar(width[, min_x, max_x])(x, y) +``` + +**Параметры** + +- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md). + +- `min_x` — Начало интервала. Необязательный параметр. +- `max_x` — Конец интервала. Необязательный параметр. + +**Аргументы** + +- `x` — Поле со значениями. +- `y` — Поле с частотой повторения значений. + + +**Возвращаемые значения** + +- Гистограмма частот. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date; + +INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11'); + +SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); + +SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); +``` + +Результат: + +``` text +┌─sparkbar(9)(event_date, cnt)─┐ +│ ▂▅▂▃▆█ ▂ │ +└──────────────────────────────┘ + +┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ +│ ▂▅▂▃▇▆█ │ +└──────────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 7c709619679..5331cf00728 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -76,14 +76,16 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U ## Шаблоны поиска в компонентах пути {#globs-in-path} -При описании пути к файлу могут использоваться шаблоны поиска. Обрабатываются только те файлы, у которых путь и название соответствуют шаблону полностью (а не только префикс или суффикс). +Путь к файлу может содержать шаблоны в режиме доступа только для чтения. +Шаблоны могут содержаться в разных частях пути. +Обрабатываться будут те и только те файлы, которые существуют в файловой системе и удовлетворяют всему шаблону пути. - `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `?` — заменяет ровно один любой символ. - `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. Эти строки также могут содержать символ `/`. - `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). -Конструкция с `{}` аналогична табличной функции [remote](remote.md). +Конструкция с `{}` аналогична табличным функциям [remote](remote.md), [hdfs](hdfs.md). **Пример** diff --git a/docs/ru/sql-reference/table-functions/fileCluster.md b/docs/ru/sql-reference/table-functions/fileCluster.md index 7385f4859dc..2f2145b6dba 100644 --- a/docs/ru/sql-reference/table-functions/fileCluster.md +++ b/docs/ru/sql-reference/table-functions/fileCluster.md @@ -44,7 +44,7 @@ $ cat /var/lib/clickhouse/user_files/test1.csv 1,"file1" 11,"file11" -$ cat /var/lib/clickhouse/user_files/test1.csv +$ cat /var/lib/clickhouse/user_files/test2.csv 2,"file2" 22,"file22" ``` diff --git a/docs/ru/sql-reference/table-functions/hdfs.md b/docs/ru/sql-reference/table-functions/hdfs.md index b70de5e3a4f..6dcb1a21791 100644 --- a/docs/ru/sql-reference/table-functions/hdfs.md +++ b/docs/ru/sql-reference/table-functions/hdfs.md @@ -14,7 +14,7 @@ hdfs(URI, format, structure) **Входные параметры** -- `URI` — URI файла в HDFS. Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, \``'abc', 'def'` — строки. +- `URI` — URI файла в HDFS. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. @@ -41,19 +41,22 @@ LIMIT 2 ## Шаблоны поиска в компонентах пути {#globs-in-path} -- `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов. +Путь к файлу может содержать шаблоны в режиме доступа только для чтения. +Шаблоны могут содержаться в разных частях пути. +Обрабатываться будут те и только те файлы, которые существуют в файловой системе и удовлетворяют всему шаблону пути. + + +- `*` — Заменяет любое количество любых символов (кроме `/`), включая отсутствие символов. - `?` — Заменяет ровно один любой символ. - `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. Эти строки также могут содержать символ `/`. - `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). -Конструкция с `{}` аналогична табличной функции [remote](remote.md). +Конструкция с `{}` аналогична табличной функции [remote](remote.md), [file](file.md). :::danger Предупреждение -Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +Если ваш список файлов содержит интервал с ведущими нулями, используйте отдельную конструкцию с фигурными скобками для каждой цифры или используйте `?`. ::: -Шаблоны могут содержаться в разных частях пути. Обрабатываться будут ровно те файлы, которые и удовлетворяют всему шаблону пути, и существуют в файловой системе. - ## Виртуальные столбцы {#virtualnye-stolbtsy} - `_path` — Путь к файлу. diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 7deef68f47f..fe40cb0c507 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -11,7 +11,7 @@ sidebar_label: s3 **Синтаксис** ``` sql -s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +s3(path [,access_key_id, secret_access_key [,session_token]] [,format] [,structure] [,compression]) ``` **Aргументы** diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index b8f34d805ff..b382bf5e384 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -11,14 +11,14 @@ sidebar_label: s3Cluster **Синтаксис** ``` sql -s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure]) +s3Cluster(cluster_name, source, [,access_key_id, secret_access_key [,session_token]] [,format] [,structure]) ``` **Аргументы** - `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам. - `source` — URL файла или нескольких файлов. Поддерживает следующие символы подстановки: `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. +- `access_key_id`, `secret_access_key` и `session_token` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md index 977b4dce92a..c0a08291e02 100644 --- a/docs/zh/development/style.md +++ b/docs/zh/development/style.md @@ -485,7 +485,7 @@ catch (const DB::Exception & e) ``` cpp if (0 != close(fd)) - throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); + throw ErrnoException(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file {}", file_name); ``` `不要使用断言`。 diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index c738ae0f24c..bfa69338657 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -1,821 +1,822 @@ ---- -slug: /zh/engines/table-engines/mergetree-family/mergetree ---- -# MergeTree {#table_engines-mergetree} - -Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该系列(`*MergeTree`)中的其他引擎。 - -`MergeTree` 系列的引擎被设计用于插入极大量的数据到一张表当中。数据可以以数据片段的形式一个接着一个的快速写入,数据片段在后台按照一定的规则进行合并。相比在插入时不断修改(重写)已存储的数据,这种策略会高效很多。 - -主要特点: - -- 存储的数据按主键排序。 - - 这使得您能够创建一个小型的稀疏索引来加快数据检索。 - -- 如果指定了 [分区键](custom-partitioning-key.md) 的话,可以使用分区。 - - 在相同数据集和相同结果集的情况下 ClickHouse 中某些带分区的操作会比普通操作更快。查询中指定了分区键时 ClickHouse 会自动截取分区数据。这也有效增加了查询性能。 - -- 支持数据副本。 - - `ReplicatedMergeTree` 系列的表提供了数据副本功能。更多信息,请参阅 [数据副本](replication.md) 一节。 - -- 支持数据采样。 - - 需要的话,您可以给表设置一个采样方法。 - -:::info -[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。 -::: - -## 建表 {#table_engine-mergetree-creating-a-table} - -``` sql -CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] -( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], - ... - INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, - INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2 -) ENGINE = MergeTree() -ORDER BY expr -[PARTITION BY expr] -[PRIMARY KEY expr] -[SAMPLE BY expr] -[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] -[SETTINGS name=value, ...] -``` - -对于以上参数的描述,可参考 [CREATE 语句 的描述](../../../engines/table-engines/mergetree-family/mergetree.md) 。 - - - -**子句** - -- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 - -- `ORDER BY` — 排序键。 - - 可以是一组列的元组或任意的表达式。 例如: `ORDER BY (CounterID, EventDate)` 。 - - 如果没有使用 `PRIMARY KEY` 显式指定的主键,ClickHouse 会使用排序键作为主键。 - - 如果不需要排序,可以使用 `ORDER BY tuple()`. 参考 [选择主键](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#selecting-the-primary-key) - -- `PARTITION BY` — [分区键](custom-partitioning-key.md) ,可选项。 - - 大多数情况下,不需要使用分区键。即使需要使用,也不需要使用比月更细粒度的分区键。分区不会加快查询(这与 ORDER BY 表达式不同)。永远也别使用过细粒度的分区键。不要使用客户端指定分区标识符或分区字段名称来对数据进行分区(而是将分区字段标识或名称作为 ORDER BY 表达式的第一列来指定分区)。 - - 要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的列。分区名的格式会是 `"YYYYMM"` 。 - -- `PRIMARY KEY` - 如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),在这里指定,可选项。 - - 默认情况下主键跟排序键(由 `ORDER BY` 子句指定)相同。 - 因此,大部分情况下不需要再专门指定一个 `PRIMARY KEY` 子句。 - -- `SAMPLE BY` - 用于抽样的表达式,可选项。 - - 如果要用抽样表达式,主键中必须包含这个表达式。例如: - `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))` 。 - -- `TTL` - 指定行存储的持续时间并定义数据片段在硬盘和卷上的移动逻辑的规则列表,可选项。 - - 表达式中必须存在至少一个 `Date` 或 `DateTime` 类型的列,比如: - - `TTL date + INTERVAl 1 DAY` - - 规则的类型 `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'`指定了当满足条件(到达指定时间)时所要执行的动作:移除过期的行,还是将数据片段(如果数据片段中的所有行都满足表达式的话)移动到指定的磁盘(`TO DISK 'xxx'`) 或 卷(`TO VOLUME 'xxx'`)。默认的规则是移除(`DELETE`)。可以在列表中指定多个规则,但最多只能有一个`DELETE`的规则。 - - 更多细节,请查看 [表和列的 TTL](#table_engine-mergetree-ttl) - -- `SETTINGS` — 控制 `MergeTree` 行为的额外参数,可选项: - - - `index_granularity` — 索引粒度。索引中相邻的『标记』间的数据行数。默认值8192 。参考[数据存储](#mergetree-data-storage)。 - - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果想要仅按数据行数限制索引粒度, 请设置为0(不建议)。 - - `min_index_granularity_bytes` - 允许的最小数据粒度,默认值:1024b。该选项用于防止误操作,添加了一个非常低索引粒度的表。参考[数据存储](#mergetree-data-storage) - - `enable_mixed_granularity_parts` — 是否启用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从具有很大的行(几十上百兆字节)的表中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果您的表里有很大的行,可以开启这项配置来提升`SELECT` 查询的性能。 - - `use_minimalistic_part_header_in_zookeeper` — ZooKeeper中数据片段存储方式 。如果`use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考[服务配置参数]([Server Settings | ClickHouse Documentation](https://clickhouse.com/docs/zh/operations/server-configuration-parameters/settings/))这章中的 [设置描述](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 - - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 - - - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间,单位:秒。默认值: 86400 (1 天)。 - - `write_final_mark` — 是否启用在数据片段尾部写入最终索引标记。默认值: 1(不要关闭)。 - - `merge_max_block_size` — 在块中进行合并操作时的最大行数限制。默认值:8192 - - `storage_policy` — 存储策略。 参见 [使用具有多个块的设备进行数据存储](#table_engine-mergetree-multiple-volumes). - - `min_bytes_for_wide_part`,`min_rows_for_wide_part` 在数据片段中可以使用`Wide`格式进行存储的最小字节数/行数。您可以不设置、只设置一个,或全都设置。参考:[数据存储](#mergetree-data-storage) - - `max_parts_in_total` - 所有分区中最大块的数量(意义不明) - - `max_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最大大小。您可以在全局设置中设置该值(参见[max_compress_block_size](https://clickhouse.com/docs/zh/operations/settings/settings/#max-compress-block-size))。建表时指定该值会覆盖全局设置。 - - `min_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最小大小。您可以在全局设置中设置该值(参见[min_compress_block_size](https://clickhouse.com/docs/zh/operations/settings/settings/#min-compress-block-size))。建表时指定该值会覆盖全局设置。 - - `max_partitions_to_read` - 一次查询中可访问的分区最大数。您可以在全局设置中设置该值(参见[max_partitions_to_read](https://clickhouse.com/docs/zh/operations/settings/settings/#max_partitions_to_read))。 - -**示例配置** - -``` sql -ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192 -``` - -在这个例子中,我们设置了按月进行分区。 - -同时我们设置了一个按用户 ID 哈希的抽样表达式。这使得您可以对该表中每个 `CounterID` 和 `EventDate` 的数据伪随机分布。如果您在查询时指定了 [SAMPLE](../../../engines/table-engines/mergetree-family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 - -`index_granularity` 可省略因为 8192 是默认设置 。 - -
-已弃用的建表方法 - -:::attention "注意" -不要在新版项目中使用该方法,可能的话,请将旧项目切换到上述方法。 -::: - -``` sql -CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] -( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], - ... -) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) -``` - -**MergeTree() 参数** - -- `date-column` — 类型为 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 -- `sampling_expression` — 采样表达式。 -- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table-engines/mergetree-family/mergetree.md) -- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 - -**示例** - - MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)), 8192) - -对于主要的配置方法,这里 `MergeTree` 引擎跟前面的例子一样,可以以同样的方式配置。 -
- -## 数据存储 {#mergetree-data-storage} - -表由按主键排序的数据片段(DATA PART)组成。 - -当数据被插入到表中时,会创建多个数据片段并按主键的字典序排序。例如,主键是 `(CounterID, Date)` 时,片段中数据首先按 `CounterID` 排序,具有相同 `CounterID` 的部分按 `Date` 排序。 - -不同分区的数据会被分成不同的片段,ClickHouse 在后台合并数据片段以便更高效存储。不同分区的数据片段不会进行合并。合并机制并不保证具有相同主键的行全都合并到同一个数据片段中。 - -数据片段可以以 `Wide` 或 `Compact` 格式存储。在 `Wide` 格式下,每一列都会在文件系统中存储为单独的文件,在 `Compact` 格式下所有列都存储在一个文件中。`Compact` 格式可以提高插入量少插入频率频繁时的性能。 - -数据存储格式由 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part` 表引擎参数控制。如果数据片段中的字节数或行数少于相应的设置值,数据片段会以 `Compact` 格式存储,否则会以 `Wide` 格式存储。 - -每个数据片段被逻辑的分割成颗粒(granules)。颗粒是 ClickHouse 中进行数据查询时的最小不可分割数据集。ClickHouse 不会对行或值进行拆分,所以每个颗粒总是包含整数个行。每个颗粒的第一行通过该行的主键值进行标记, -ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记。对于每列,无论它是否包含在主键当中,ClickHouse 都会存储类似标记。这些标记让您可以在列文件中直接找到数据。 - -颗粒的大小通过表引擎参数 `index_granularity` 和 `index_granularity_bytes` 控制。颗粒的行数的在 `[1, index_granularity]` 范围中,这取决于行的大小。如果单行的大小超过了 `index_granularity_bytes` 设置的值,那么一个颗粒的大小会超过 `index_granularity_bytes`。在这种情况下,颗粒的大小等于该行的大小。 - -## 主键和索引在查询中的表现 {#primary-keys-and-indexes-in-queries} - -我们以 `(CounterID, Date)` 以主键。排序好的索引的图示会是下面这样: - -``` text - 全部数据 : [-------------------------------------------------------------------------] - CounterID: [aaaaaaaaaaaaaaaaaabbbbcdeeeeeeeeeeeeefgggggggghhhhhhhhhiiiiiiiiikllllllll] - Date: [1111111222222233331233211111222222333211111112122222223111112223311122333] - 标记: | | | | | | | | | | | - a,1 a,2 a,3 b,3 e,2 e,3 g,1 h,2 i,1 i,3 l,3 - 标记号: 0 1 2 3 4 5 6 7 8 9 10 -``` - -如果指定查询如下: - -- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 -- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 -- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 - -上面例子可以看出使用索引通常会比全表描述要高效。 - -稀疏索引会引起额外的数据读取。当读取主键单个区间范围的数据时,每个数据块中最多会多读 `index_granularity * 2` 行额外的数据。 - -稀疏索引使得您可以处理极大量的行,因为大多数情况下,这些索引常驻于内存。 - -ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键的行。 - -您可以在`PRIMARY KEY`与`ORDER BY`条件中使用`可为空的`类型的表达式,但强烈建议不要这么做。为了启用这项功能,请打开[allow_nullable_key](../../../operations/settings/index.md#allow-nullable-key),[NULLS_LAST](../../../sql-reference/statements/select/order-by.md#sorting-of-special-values)规则也适用于`ORDER BY`条件中有NULL值的情况下。 - -### 主键的选择 {#zhu-jian-de-xuan-ze} - -主键中列的数量并没有明确的限制。依据数据结构,您可以在主键包含多些或少些列。这样可以: - - - 改善索引的性能。 - - - 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: - - - 查询会使用 `c` 列作为条件 - - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 - - - 改善数据压缩。 - - ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 - - - 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 - - 在这种情况下,指定与主键不同的 *排序键* 也是有意义的。 - -长的主键会对插入性能和内存消耗有负面影响,但主键中额外的列并不影响 `SELECT` 查询的性能。 - -可以使用 `ORDER BY tuple()` 语法创建没有主键的表。在这种情况下 ClickHouse 根据数据插入的顺序存储。如果在使用 `INSERT ... SELECT` 时希望保持数据的排序,请设置 [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads)。 - -想要根据初始顺序进行数据查询,使用 [单线程查询](../../../operations/settings/settings.md#settings-max_threads) - -### 选择与排序键不同的主键 {#choosing-a-primary-key-that-differs-from-the-sorting-key} - -Clickhouse可以做到指定一个跟排序键不一样的主键,此时排序键用于在数据片段中进行排序,主键用于在索引文件中进行标记的写入。这种情况下,主键表达式元组必须是排序键表达式元组的前缀(即主键为(a,b),排序列必须为(a,b,******))。 - -当使用 [SummingMergeTree](summingmergetree.md) 和 [AggregatingMergeTree](aggregatingmergetree.md) 引擎时,这个特性非常有用。通常在使用这类引擎时,表里的列分两种:*维度* 和 *度量* 。典型的查询会通过任意的 `GROUP BY` 对度量列进行聚合并通过维度列进行过滤。由于 SummingMergeTree 和 AggregatingMergeTree 会对排序键相同的行进行聚合,所以把所有的维度放进排序键是很自然的做法。但这将导致排序键中包含大量的列,并且排序键会伴随着新添加的维度不断的更新。 - -在这种情况下合理的做法是,只保留少量的列在主键当中用于提升扫描效率,将维度列添加到排序键中。 - -对排序键进行 [ALTER](../../../sql-reference/statements/alter.md) 是轻量级的操作,因为当一个新列同时被加入到表里和排序键里时,已存在的数据片段并不需要修改。由于旧的排序键是新排序键的前缀,并且新添加的列中没有数据,因此在表修改时的数据对于新旧的排序键来说都是有序的。 - -### 索引和分区在查询中的应用 {#use-of-indexes-and-partitions-in-queries} - -对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为完整WHERE条件的一部分或全部)则可以使用索引:进行相等/不相等的比较;对主键列或分区列进行`IN`运算、有固定前缀的`LIKE`运算(如name like 'test%')、函数运算(部分函数适用),还有对上述表达式进行逻辑运算。 - - -因此,在索引键的一个或多个区间上快速地执行查询是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等执行查询,都会非常快。 - -当引擎配置如下时: - -``` sql - ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192 -``` - -这种情况下,这些查询: - -``` sql -SELECT count() FROM table WHERE EventDate = toDate(now()) AND CounterID = 34 -SELECT count() FROM table WHERE EventDate = toDate(now()) AND (CounterID = 34 OR CounterID = 42) -SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01')) AND CounterID IN (101500, 731962, 160656) AND (CounterID = 101500 OR EventDate != toDate('2014-05-01')) -``` - -ClickHouse 会依据主键索引剪掉不符合的数据,依据按月分区的分区键剪掉那些不包含符合数据的分区。 - -上文的查询显示,即使索引用于复杂表达式,因为读表操作经过优化,所以使用索引不会比完整扫描慢。 - -下面这个例子中,不会使用索引。 - -``` sql -SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' -``` - -要检查 ClickHouse 执行一个查询时能否使用索引,可设置 [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) 和 [force_primary_key](../../../operations/settings/settings.md) 。 - -使用按月分区的分区列允许只读取包含适当日期区间的数据块,这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有用主键字段作为条件的查询将会导致需要读取超过这个指定日期以外的数据。 - -### 部分单调主键的使用 - -考虑这样的场景,比如一个月中的天数。它们在一个月的范围内形成一个[单调序列](https://zh.wikipedia.org/wiki/单调函数) ,但如果扩展到更大的时间范围它们就不再单调了。这就是一个部分单调序列。如果用户使用部分单调的主键创建表,ClickHouse同样会创建一个稀疏索引。当用户从这类表中查询数据时,ClickHouse 会对查询条件进行分析。如果用户希望获取两个索引标记之间的数据并且这两个标记在一个月以内,ClickHouse 可以在这种特殊情况下使用到索引,因为它可以计算出查询参数与索引标记之间的距离。 - -如果查询参数范围内的主键不是单调序列,那么 ClickHouse 无法使用索引。在这种情况下,ClickHouse 会进行全表扫描。 - -ClickHouse 在任何主键代表一个部分单调序列的情况下都会使用这个逻辑。 - -### 跳数索引 {#tiao-shu-suo-yin-fen-duan-hui-zong-suo-yin-shi-yan-xing-de} - -此索引在 `CREATE` 语句的列部分里定义。 - -``` sql -INDEX index_name expr TYPE type(...) GRANULARITY granularity_value -``` - -`*MergeTree` 系列的表可以指定跳数索引。 -跳数索引是指数据片段按照粒度(建表时指定的`index_granularity`)分割成小块后,将上述SQL的granularity_value数量的小块组合成一个大的块,对这些大块写入索引信息,这样有助于使用`where`筛选时跳过大量不必要的数据,减少`SELECT`需要读取的数据量。 - -**示例** - -``` sql -CREATE TABLE table_name -( - u64 UInt64, - i32 Int32, - s String, - ... - INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, - INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4 -) ENGINE = MergeTree() -... -``` - -上例中的索引能让 ClickHouse 执行下面这些查询时减少读取数据量。 - -``` sql -SELECT count() FROM table WHERE s < 'z' -SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 -``` - -#### 可用的索引类型 {#table_engine-mergetree-data_skipping-indexes} - -- `minmax` - 存储指定表达式的极值(如果表达式是 `tuple` ,则存储 `tuple` 中每个元素的极值),这些信息用于跳过数据块,类似主键。 - -- `set(max_rows)` - 存储指定表达式的不重复值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查数据块是否满足 `WHERE` 条件。 - -- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - 存储一个包含数据块中所有 n元短语(ngram) 的 [布隆过滤器](https://en.wikipedia.org/wiki/Bloom_filter) 。只可用在字符串上。 - 可用于优化 `equals` , `like` 和 `in` 表达式的性能。 - - `n` – 短语长度。 - - `size_of_bloom_filter_in_bytes` – 布隆过滤器大小,字节为单位。(因为压缩得好,可以指定比较大的值,如 256 或 512)。 - - `number_of_hash_functions` – 布隆过滤器中使用的哈希函数的个数。 - - `random_seed` – 哈希函数的随机种子。 - -- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - 跟 `ngrambf_v1` 类似,但是存储的是token而不是ngrams。Token是由非字母数字的符号分割的序列。 - -- `bloom_filter(bloom_filter([false_positive])` – 为指定的列存储布隆过滤器 - - 可选参数`false_positive`用来指定从布隆过滤器收到错误响应的几率。取值范围是 (0,1),默认值:0.025 - - 支持的数据类型:`Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`。 - - 以下函数会用到这个索引: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md) - -``` sql -INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 -INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4 -INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4 -``` - -#### 函数支持 {#functions-support} - -WHERE 子句中的条件可以包含对某列数据进行运算的函数表达式,如果列是索引的一部分,ClickHouse会在执行函数时尝试使用索引。不同的函数对索引的支持是不同的。 - -`set` 索引会对所有函数生效,其他索引对函数的生效情况见下表 - -| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | -| ------------------------------------------------------------ | ----------- | ------ | ---------- | ---------- | ------------ | -| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#equals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, <>)](../../../sql-reference/functions/comparison-functions.md#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | -| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | -| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [less (\<)](../../../sql-reference/functions/comparison-functions.md#less) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#greater) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | - -常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。 - -:::note -布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于结果返回为假的函数,例如: - -- 可以用来优化的场景 - - `s LIKE '%test%'` - - `NOT s NOT LIKE '%test%'` - - `s = 1` - - `NOT s != 1` - - `startsWith(s, 'test')` -- 不能用来优化的场景 - - `NOT s LIKE '%test%'` - - `s NOT LIKE '%test%'` - - `NOT s = 1` - - `s != 1` - - `NOT startsWith(s, 'test')` -::: - -## 并发数据访问 {#concurrent-data-access} - -对于表的并发访问,我们使用多版本机制。换言之,当一张表同时被读和更新时,数据从当前查询到的一组片段中读取。没有冗长的的锁。插入不会阻碍读取。 - -对表的读操作是自动并行的。 - -## 列和表的 TTL {#table_engine-mergetree-ttl} - -TTL用于设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。表级别的 TTL 还会指定数据在磁盘和卷上自动转移的逻辑。 - -TTL 表达式的计算结果必须是 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 或 [日期时间](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的字段。 - -示例: - -``` sql -TTL time_column -TTL time_column + interval -``` - -要定义`interval`, 需要使用 [时间间隔](../../../engines/table-engines/mergetree-family/mergetree.md#operators-datetime) 操作符。 - -``` sql -TTL date_time + INTERVAL 1 MONTH -TTL date_time + INTERVAL 15 HOUR -``` - -### 列 TTL {#mergetree-column-ttl} - -当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中删除此列。 - -`TTL`子句不能被用于主键字段。 - -**示例:** - -创建表时指定 `TTL` - -``` sql -CREATE TABLE example_table -( - d DateTime, - a Int TTL d + INTERVAL 1 MONTH, - b Int TTL d + INTERVAL 1 MONTH, - c String -) -ENGINE = MergeTree -PARTITION BY toYYYYMM(d) -ORDER BY d; -``` - -为表中已存在的列字段添加 `TTL` - -``` sql -ALTER TABLE example_table - MODIFY COLUMN - c String TTL d + INTERVAL 1 DAY; -``` - -修改列字段的 `TTL` - -``` sql -ALTER TABLE example_table - MODIFY COLUMN - c String TTL d + INTERVAL 1 MONTH; -``` - -### 表 TTL {#mergetree-table-ttl} - -表可以设置一个用于移除过期行的表达式,以及多个用于在磁盘或卷上自动转移数据片段的表达式。当表中的行过期时,ClickHouse 会删除所有对应的行。对于数据片段的转移特性,必须所有的行都满足转移条件。 - -``` sql -TTL expr - [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... - [WHERE conditions] - [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] - -``` - -TTL 规则的类型紧跟在每个 TTL 表达式后面,它会影响满足表达式时(到达指定时间时)应当执行的操作: - -- `DELETE` - 删除过期的行(默认操作); -- `TO DISK 'aaa'` - 将数据片段移动到磁盘 `aaa`; -- `TO VOLUME 'bbb'` - 将数据片段移动到卷 `bbb`. -- `GROUP BY` - 聚合过期的行 - -使用`WHERE`从句,您可以指定哪些过期的行会被删除或聚合(不适用于移动)。`GROUP BY`表达式必须是表主键的前缀。如果某列不是`GROUP BY`表达式的一部分,也没有在SET从句显示引用,结果行中相应列的值是随机的(就好像使用了`any`函数)。 - -**示例**: - -创建时指定 TTL - -``` sql -CREATE TABLE example_table -( - d DateTime, - a Int -) -ENGINE = MergeTree -PARTITION BY toYYYYMM(d) -ORDER BY d -TTL d + INTERVAL 1 MONTH DELETE, - d + INTERVAL 1 WEEK TO VOLUME 'aaa', - d + INTERVAL 2 WEEK TO DISK 'bbb'; -``` - -修改表的 `TTL` - -``` sql -ALTER TABLE example_table - MODIFY TTL d + INTERVAL 1 DAY; -``` - -创建一张表,设置一个月后数据过期,这些过期的行中日期为星期一的删除: - -``` sql -CREATE TABLE table_with_where -( - d DateTime, - a Int -) -ENGINE = MergeTree -PARTITION BY toYYYYMM(d) -ORDER BY d -TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; -``` - -创建一张表,设置过期的列会被聚合。列`x`包含每组行中的最大值,`y`为最小值,`d`为可能任意值。 - -``` sql -CREATE TABLE table_for_aggregation -( - d DateTime, - k1 Int, - k2 Int, - x Int, - y Int -) -ENGINE = MergeTree -ORDER BY (k1, k2) -TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); -``` - -**删除数据** - -ClickHouse 在数据片段合并时会删除掉过期的数据。 - -当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 您可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。 - -如果在两次合并的时间间隔中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 。 - -## 使用多个块设备进行数据存储 {#table_engine-mergetree-multiple-volumes} - -### 介绍 {#introduction} - -MergeTree 系列表引擎可以将数据存储在多个块设备上。这对某些可以潜在被划分为“冷”“热”的表来说是很有用的。最新数据被定期的查询但只需要很小的空间。相反,详尽的历史数据很少被用到。如果有多块磁盘可用,那么“热”的数据可以放置在快速的磁盘上(比如 NVMe 固态硬盘或内存),“冷”的数据可以放在相对较慢的磁盘上(比如机械硬盘)。 - -数据片段是 `MergeTree` 引擎表的最小可移动单元。属于同一个数据片段的数据被存储在同一块磁盘上。数据片段会在后台自动的在磁盘间移动,也可以通过 [ALTER](../../../sql-reference/statements/alter.md#alter_move-partition) 查询来移动。 - -### 术语 {#terms} - -- 磁盘 — 挂载到文件系统的块设备 -- 默认磁盘 — 在服务器设置中通过 [path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) 参数指定的数据存储 -- 卷 — 相同磁盘的顺序列表 (类似于 [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)) -- 存储策略 — 卷的集合及他们之间的数据移动规则 - - 以上名称的信息在Clickhouse中系统表[system.storage_policies](https://clickhouse.com/docs/zh/operations/system-tables/storage_policies/#system_tables-storage_policies)和[system.disks](https://clickhouse.com/docs/zh/operations/system-tables/disks/#system_tables-disks)体现。为了应用存储策略,可以在建表时使用`storage_policy`设置。 - -### 配置 {#table_engine-mergetree-multiple-volumes_configure} - -磁盘、卷和存储策略应当在主配置文件 `config.xml` 或 `config.d` 目录中的独立文件中的 `` 标签内定义。 - -配置结构: - -``` xml - - - - /mnt/fast_ssd/clickhouse/ - - - /mnt/hdd1/clickhouse/ - 10485760 - - - /mnt/hdd2/clickhouse/ - 10485760 - - - ... - - - ... - -``` - -标签: - -- `` — 磁盘名,名称必须与其他磁盘不同. -- `path` — 服务器将用来存储数据 (`data` 和 `shadow` 目录) 的路径, 应当以 ‘/’ 结尾. -- `keep_free_space_bytes` — 需要保留的剩余磁盘空间. - -磁盘定义的顺序无关紧要。 - -存储策略配置: - -``` xml - - ... - - - - - disk_name_from_disks_configuration - 1073741824 - - - - - - - 0.2 - - - - - - - - ... - -``` - -标签: - -- `policy_name_N` — 策略名称,不能重复。 -- `volume_name_N` — 卷名称,不能重复。 -- `disk` — 卷中的磁盘。 -- `max_data_part_size_bytes` — 卷中的磁盘可以存储的数据片段的最大大小。 -- `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 -- `prefer_not_to_merge` - 禁止在这个卷中进行数据合并。该选项启用时,对该卷的数据不能进行合并。这个选项主要用于慢速磁盘。 - -配置示例: - -``` xml - - ... - - - - - disk1 - disk2 - - - - - - - - fast_ssd - 1073741824 - - - disk1 - - - 0.2 - - - - -
- jbod1 -
- - external - true - -
-
-
- ... -
-``` - -在给出的例子中, `hdd_in_order` 策略实现了 [循环制](https://zh.wikipedia.org/wiki/循环制) 方法。因此这个策略只定义了一个卷(`single`),数据片段会以循环的顺序全部存储到它的磁盘上。当有多个类似的磁盘挂载到系统上,但没有配置 RAID 时,这种策略非常有用。请注意一个每个独立的磁盘驱动都并不可靠,您可能需要用3份或更多的复制份数来补偿它。 - -如果在系统中有不同类型的磁盘可用,可以使用 `moving_from_ssd_to_hdd`。`hot` 卷由 SSD 磁盘(`fast_ssd`)组成,这个卷上可以存储的数据片段的最大大小为 1GB。所有大于 1GB 的数据片段都会被直接存储到 `cold` 卷上,`cold` 卷包含一个名为 `disk1` 的 HDD 磁盘。 -同样,一旦 `fast_ssd` 被填充超过 80%,数据会通过后台进程向 `disk1` 进行转移。 - -存储策略中卷的枚举顺序是很重要的。因为当一个卷被充满时,数据会向下一个卷转移。磁盘的枚举顺序同样重要,因为数据是依次存储在磁盘上的。 - -在创建表时,可以应用存储策略: - -``` sql -CREATE TABLE table_with_non_default_policy ( - EventDate Date, - OrderID UInt64, - BannerID UInt64, - SearchPhrase String -) ENGINE = MergeTree -ORDER BY (OrderID, BannerID) -PARTITION BY toYYYYMM(EventDate) -SETTINGS storage_policy = 'moving_from_ssd_to_hdd' -``` - -`default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。您可以使用[ALTER TABLE ... MODIFY SETTING]来修改存储策略,新的存储策略应该包含所有以前的磁盘和卷,并使用相同的名称。 - -可以通过 [background_move_pool_size](../../../operations/server-configuration-parameters/settings.md#background_move_pool_size) 设置调整执行后台任务的线程数。 - -### 详细说明 {#details} - -对于 `MergeTree` 表,数据通过以下不同的方式写入到磁盘当中: - -- 插入(`INSERT`查询) -- 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations) -- 从另一个副本下载 -- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 - -除了数据变异和冻结分区以外的情况下,数据按照以下逻辑存储到卷或磁盘上: - -1. 首个卷(按定义顺序)拥有足够的磁盘空间存储数据片段(`unreserved_space > current_part_size`)并且允许存储给定数据片段的大小(`max_data_part_size_bytes > current_part_size`) -2. 在这个数据卷内,紧挨着先前存储数据的那块磁盘之后的磁盘,拥有比数据片段大的剩余空间。(`unreserved_space - keep_free_space_bytes > current_part_size`) - -更进一步,数据变异和分区冻结使用的是 [硬链接](https://en.wikipedia.org/wiki/Hard_link)。不同磁盘之间的硬链接是不支持的,所以在这种情况下数据片段都会被存储到原来的那一块磁盘上。 - -在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。 - -用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 - -数据移动不会妨碍到数据复制。也就是说,同一张表的不同副本可以指定不同的存储策略。 - -在后台合并和数据变异之后,旧的数据片段会在一定时间后被移除 (`old_parts_lifetime`)。在这期间,他们不能被移动到其他的卷或磁盘。也就是说,直到数据片段被完全移除,它们仍然会被磁盘占用空间计算在内。 - -## 使用S3进行数据存储 {#using-s3-data-storage} - -`MergeTree`系列表引擎允许使用[S3](https://aws.amazon.com/s3/)存储数据,需要修改磁盘类型为`S3`。 - -示例配置: - -``` xml - - ... - - - s3 - https://storage.yandexcloud.net/my-bucket/root-path/ - your_access_key_id - your_secret_access_key - - your_base64_encoded_customer_key - - http://proxy1 - http://proxy2 - - 10000 - 5000 - 10 - 4 - 1000 - /var/lib/clickhouse/disks/s3/ - false - - - ... - -``` - -必须的参数: - -- `endpoint` - S3的结点URL,以`path`或`virtual hosted`[格式](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html)书写。 -- `access_key_id` - S3的Access Key ID。 -- `secret_access_key` - S3的Secret Access Key。 - -可选参数: - -- `region` - S3的区域名称 -- `use_environment_credentials` - 从环境变量AWS_ACCESS_KEY_ID、AWS_SECRET_ACCESS_KEY和AWS_SESSION_TOKEN中读取认证参数。默认值为`false`。 -- `use_insecure_imds_request` - 如果设置为`true`,S3客户端在认证时会使用不安全的IMDS请求。默认值为`false`。 -- `proxy` - 访问S3结点URL时代理设置。每一个`uri`项的值都应该是合法的代理URL。 -- `connect_timeout_ms` - Socket连接超时时间,默认值为`10000`,即10秒。 -- `request_timeout_ms` - 请求超时时间,默认值为`5000`,即5秒。 -- `retry_attempts` - 请求失败后的重试次数,默认值为10。 -- `single_read_retries` - 读过程中连接丢失后重试次数,默认值为4。 -- `min_bytes_for_seek` - 使用查找操作,而不是顺序读操作的最小字节数,默认值为1000。 -- `metadata_path` - 本地存放S3元数据文件的路径,默认值为`/var/lib/clickhouse/disks//` -- `skip_access_check` - 如果为`true`,Clickhouse启动时不检查磁盘是否可用。默认为`false`。 -- `server_side_encryption_customer_key_base64` - 如果指定该项的值,请求时会加上为了访问SSE-C加密数据而必须的头信息。 - -S3磁盘也可以设置冷热存储: -```xml - - ... - - - s3 - https://storage.yandexcloud.net/my-bucket/root-path/ - your_access_key_id - your_secret_access_key - - - - - -
- s3 -
-
-
- - -
- default -
- - s3 - -
- 0.2 -
-
- ... -
-``` - -指定了`cold`选项后,本地磁盘剩余空间如果小于`move_factor * disk_size`,或有TTL设置时,数据就会定时迁移至S3了。 - -## 虚拟列 {#virtual-columns} - -- `_part` - 分区名称。 -- `_part_index` - 作为请求的结果,按顺序排列的分区数。 -- `_partition_id` — 分区名称。 -- `_part_uuid` - 唯一部分标识符(如果 MergeTree 设置`assign_part_uuids` 已启用)。 -- `_partition_value` — `partition by` 表达式的值(元组)。 -- `_sample_factor` - 采样因子(来自请求)。 +--- +slug: /zh/engines/table-engines/mergetree-family/mergetree +--- +# MergeTree {#table_engines-mergetree} + +Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该系列(`*MergeTree`)中的其他引擎。 + +`MergeTree` 系列的引擎被设计用于插入极大量的数据到一张表当中。数据可以以数据片段的形式一个接着一个的快速写入,数据片段在后台按照一定的规则进行合并。相比在插入时不断修改(重写)已存储的数据,这种策略会高效很多。 + +主要特点: + +- 存储的数据按主键排序。 + + 这使得您能够创建一个小型的稀疏索引来加快数据检索。 + +- 如果指定了 [分区键](custom-partitioning-key.md) 的话,可以使用分区。 + + 在相同数据集和相同结果集的情况下 ClickHouse 中某些带分区的操作会比普通操作更快。查询中指定了分区键时 ClickHouse 会自动截取分区数据。这也有效增加了查询性能。 + +- 支持数据副本。 + + `ReplicatedMergeTree` 系列的表提供了数据副本功能。更多信息,请参阅 [数据副本](replication.md) 一节。 + +- 支持数据采样。 + + 需要的话,您可以给表设置一个采样方法。 + +:::info +[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。 +::: + +## 建表 {#table_engine-mergetree-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + ... + INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, + INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2 +) ENGINE = MergeTree() +ORDER BY expr +[PARTITION BY expr] +[PRIMARY KEY expr] +[SAMPLE BY expr] +[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] +[SETTINGS name=value, ...] +``` + +对于以上参数的描述,可参考 [CREATE 语句 的描述](../../../engines/table-engines/mergetree-family/mergetree.md) 。 + + + +**子句** + +- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 + +- `ORDER BY` — 排序键。 + + 可以是一组列的元组或任意的表达式。 例如: `ORDER BY (CounterID, EventDate)` 。 + + 如果没有使用 `PRIMARY KEY` 显式指定的主键,ClickHouse 会使用排序键作为主键。 + + 如果不需要排序,可以使用 `ORDER BY tuple()`. 参考 [选择主键](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#selecting-the-primary-key) + +- `PARTITION BY` — [分区键](custom-partitioning-key.md) ,可选项。 + + 大多数情况下,不需要使用分区键。即使需要使用,也不需要使用比月更细粒度的分区键。分区不会加快查询(这与 ORDER BY 表达式不同)。永远也别使用过细粒度的分区键。不要使用客户端指定分区标识符或分区字段名称来对数据进行分区(而是将分区字段标识或名称作为 ORDER BY 表达式的第一列来指定分区)。 + + 要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的列。分区名的格式会是 `"YYYYMM"` 。 + +- `PRIMARY KEY` - 如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),在这里指定,可选项。 + + 默认情况下主键跟排序键(由 `ORDER BY` 子句指定)相同。 + 因此,大部分情况下不需要再专门指定一个 `PRIMARY KEY` 子句。 + +- `SAMPLE BY` - 用于抽样的表达式,可选项。 + + 如果要用抽样表达式,主键中必须包含这个表达式。例如: + `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))` 。 + +- `TTL` - 指定行存储的持续时间并定义数据片段在硬盘和卷上的移动逻辑的规则列表,可选项。 + + 表达式中必须存在至少一个 `Date` 或 `DateTime` 类型的列,比如: + + `TTL date + INTERVAl 1 DAY` + + 规则的类型 `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'`指定了当满足条件(到达指定时间)时所要执行的动作:移除过期的行,还是将数据片段(如果数据片段中的所有行都满足表达式的话)移动到指定的磁盘(`TO DISK 'xxx'`) 或 卷(`TO VOLUME 'xxx'`)。默认的规则是移除(`DELETE`)。可以在列表中指定多个规则,但最多只能有一个`DELETE`的规则。 + + 更多细节,请查看 [表和列的 TTL](#table_engine-mergetree-ttl) + +- `SETTINGS` — 控制 `MergeTree` 行为的额外参数,可选项: + + - `index_granularity` — 索引粒度。索引中相邻的『标记』间的数据行数。默认值8192 。参考[数据存储](#mergetree-data-storage)。 + - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果想要仅按数据行数限制索引粒度, 请设置为0(不建议)。 + - `min_index_granularity_bytes` - 允许的最小数据粒度,默认值:1024b。该选项用于防止误操作,添加了一个非常低索引粒度的表。参考[数据存储](#mergetree-data-storage) + - `enable_mixed_granularity_parts` — 是否启用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从具有很大的行(几十上百兆字节)的表中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果您的表里有很大的行,可以开启这项配置来提升`SELECT` 查询的性能。 + - `use_minimalistic_part_header_in_zookeeper` — ZooKeeper中数据片段存储方式 。如果`use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考[服务配置参数]([Server Settings | ClickHouse Documentation](https://clickhouse.com/docs/zh/operations/server-configuration-parameters/settings/))这章中的 [设置描述](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 + - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 + + - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间,单位:秒。默认值: 86400 (1 天)。 + - `write_final_mark` — 是否启用在数据片段尾部写入最终索引标记。默认值: 1(不要关闭)。 + - `merge_max_block_size` — 在块中进行合并操作时的最大行数限制。默认值:8192 + - `storage_policy` — 存储策略。 参见 [使用具有多个块的设备进行数据存储](#table_engine-mergetree-multiple-volumes). + - `min_bytes_for_wide_part`,`min_rows_for_wide_part` 在数据片段中可以使用`Wide`格式进行存储的最小字节数/行数。您可以不设置、只设置一个,或全都设置。参考:[数据存储](#mergetree-data-storage) + - `max_parts_in_total` - 所有分区中最大块的数量(意义不明) + - `max_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最大大小。您可以在全局设置中设置该值(参见[max_compress_block_size](https://clickhouse.com/docs/zh/operations/settings/settings/#max-compress-block-size))。建表时指定该值会覆盖全局设置。 + - `min_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最小大小。您可以在全局设置中设置该值(参见[min_compress_block_size](https://clickhouse.com/docs/zh/operations/settings/settings/#min-compress-block-size))。建表时指定该值会覆盖全局设置。 + - `max_partitions_to_read` - 一次查询中可访问的分区最大数。您可以在全局设置中设置该值(参见[max_partitions_to_read](https://clickhouse.com/docs/zh/operations/settings/settings/#max_partitions_to_read))。 + +**示例配置** + +``` sql +ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192 +``` + +在这个例子中,我们设置了按月进行分区。 + +同时我们设置了一个按用户 ID 哈希的抽样表达式。这使得您可以对该表中每个 `CounterID` 和 `EventDate` 的数据伪随机分布。如果您在查询时指定了 [SAMPLE](../../../engines/table-engines/mergetree-family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 + +`index_granularity` 可省略因为 8192 是默认设置 。 + +
+已弃用的建表方法 + +:::attention "注意" +不要在新版项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) +``` + +**MergeTree() 参数** + +- `date-column` — 类型为 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 +- `sampling_expression` — 采样表达式。 +- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table-engines/mergetree-family/mergetree.md) +- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 + +**示例** + + MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)), 8192) + +对于主要的配置方法,这里 `MergeTree` 引擎跟前面的例子一样,可以以同样的方式配置。 +
+ +## 数据存储 {#mergetree-data-storage} + +表由按主键排序的数据片段(DATA PART)组成。 + +当数据被插入到表中时,会创建多个数据片段并按主键的字典序排序。例如,主键是 `(CounterID, Date)` 时,片段中数据首先按 `CounterID` 排序,具有相同 `CounterID` 的部分按 `Date` 排序。 + +不同分区的数据会被分成不同的片段,ClickHouse 在后台合并数据片段以便更高效存储。不同分区的数据片段不会进行合并。合并机制并不保证具有相同主键的行全都合并到同一个数据片段中。 + +数据片段可以以 `Wide` 或 `Compact` 格式存储。在 `Wide` 格式下,每一列都会在文件系统中存储为单独的文件,在 `Compact` 格式下所有列都存储在一个文件中。`Compact` 格式可以提高插入量少插入频率频繁时的性能。 + +数据存储格式由 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part` 表引擎参数控制。如果数据片段中的字节数或行数少于相应的设置值,数据片段会以 `Compact` 格式存储,否则会以 `Wide` 格式存储。 + +每个数据片段被逻辑的分割成颗粒(granules)。颗粒是 ClickHouse 中进行数据查询时的最小不可分割数据集。ClickHouse 不会对行或值进行拆分,所以每个颗粒总是包含整数个行。每个颗粒的第一行通过该行的主键值进行标记, +ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记。对于每列,无论它是否包含在主键当中,ClickHouse 都会存储类似标记。这些标记让您可以在列文件中直接找到数据。 + +颗粒的大小通过表引擎参数 `index_granularity` 和 `index_granularity_bytes` 控制。颗粒的行数的在 `[1, index_granularity]` 范围中,这取决于行的大小。如果单行的大小超过了 `index_granularity_bytes` 设置的值,那么一个颗粒的大小会超过 `index_granularity_bytes`。在这种情况下,颗粒的大小等于该行的大小。 + +## 主键和索引在查询中的表现 {#primary-keys-and-indexes-in-queries} + +我们以 `(CounterID, Date)` 以主键。排序好的索引的图示会是下面这样: + +``` text + 全部数据 : [-------------------------------------------------------------------------] + CounterID: [aaaaaaaaaaaaaaaaaabbbbcdeeeeeeeeeeeeefgggggggghhhhhhhhhiiiiiiiiikllllllll] + Date: [1111111222222233331233211111222222333211111112122222223111112223311122333] + 标记: | | | | | | | | | | | + a,1 a,2 a,3 b,3 e,2 e,3 g,1 h,2 i,1 i,3 l,3 + 标记号: 0 1 2 3 4 5 6 7 8 9 10 +``` + +如果指定查询如下: + +- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 +- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 +- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 + +上面例子可以看出使用索引通常会比全表描述要高效。 + +稀疏索引会引起额外的数据读取。当读取主键单个区间范围的数据时,每个数据块中最多会多读 `index_granularity * 2` 行额外的数据。 + +稀疏索引使得您可以处理极大量的行,因为大多数情况下,这些索引常驻于内存。 + +ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键的行。 + +您可以在`PRIMARY KEY`与`ORDER BY`条件中使用`可为空的`类型的表达式,但强烈建议不要这么做。为了启用这项功能,请打开[allow_nullable_key](../../../operations/settings/index.md#allow-nullable-key),[NULLS_LAST](../../../sql-reference/statements/select/order-by.md#sorting-of-special-values)规则也适用于`ORDER BY`条件中有NULL值的情况下。 + +### 主键的选择 {#zhu-jian-de-xuan-ze} + +主键中列的数量并没有明确的限制。依据数据结构,您可以在主键包含多些或少些列。这样可以: + + - 改善索引的性能。 + + - 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: + + - 查询会使用 `c` 列作为条件 + - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 + + - 改善数据压缩。 + + ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 + + - 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 + + 在这种情况下,指定与主键不同的 *排序键* 也是有意义的。 + +长的主键会对插入性能和内存消耗有负面影响,但主键中额外的列并不影响 `SELECT` 查询的性能。 + +可以使用 `ORDER BY tuple()` 语法创建没有主键的表。在这种情况下 ClickHouse 根据数据插入的顺序存储。如果在使用 `INSERT ... SELECT` 时希望保持数据的排序,请设置 [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads)。 + +想要根据初始顺序进行数据查询,使用 [单线程查询](../../../operations/settings/settings.md#settings-max_threads) + +### 选择与排序键不同的主键 {#choosing-a-primary-key-that-differs-from-the-sorting-key} + +Clickhouse可以做到指定一个跟排序键不一样的主键,此时排序键用于在数据片段中进行排序,主键用于在索引文件中进行标记的写入。这种情况下,主键表达式元组必须是排序键表达式元组的前缀(即主键为(a,b),排序列必须为(a,b,******))。 + +当使用 [SummingMergeTree](summingmergetree.md) 和 [AggregatingMergeTree](aggregatingmergetree.md) 引擎时,这个特性非常有用。通常在使用这类引擎时,表里的列分两种:*维度* 和 *度量* 。典型的查询会通过任意的 `GROUP BY` 对度量列进行聚合并通过维度列进行过滤。由于 SummingMergeTree 和 AggregatingMergeTree 会对排序键相同的行进行聚合,所以把所有的维度放进排序键是很自然的做法。但这将导致排序键中包含大量的列,并且排序键会伴随着新添加的维度不断的更新。 + +在这种情况下合理的做法是,只保留少量的列在主键当中用于提升扫描效率,将维度列添加到排序键中。 + +对排序键进行 [ALTER](../../../sql-reference/statements/alter.md) 是轻量级的操作,因为当一个新列同时被加入到表里和排序键里时,已存在的数据片段并不需要修改。由于旧的排序键是新排序键的前缀,并且新添加的列中没有数据,因此在表修改时的数据对于新旧的排序键来说都是有序的。 + +### 索引和分区在查询中的应用 {#use-of-indexes-and-partitions-in-queries} + +对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为完整WHERE条件的一部分或全部)则可以使用索引:进行相等/不相等的比较;对主键列或分区列进行`IN`运算、有固定前缀的`LIKE`运算(如name like 'test%')、函数运算(部分函数适用),还有对上述表达式进行逻辑运算。 + + +因此,在索引键的一个或多个区间上快速地执行查询是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等执行查询,都会非常快。 + +当引擎配置如下时: + +``` sql + ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192 +``` + +这种情况下,这些查询: + +``` sql +SELECT count() FROM table WHERE EventDate = toDate(now()) AND CounterID = 34 +SELECT count() FROM table WHERE EventDate = toDate(now()) AND (CounterID = 34 OR CounterID = 42) +SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01')) AND CounterID IN (101500, 731962, 160656) AND (CounterID = 101500 OR EventDate != toDate('2014-05-01')) +``` + +ClickHouse 会依据主键索引剪掉不符合的数据,依据按月分区的分区键剪掉那些不包含符合数据的分区。 + +上文的查询显示,即使索引用于复杂表达式,因为读表操作经过优化,所以使用索引不会比完整扫描慢。 + +下面这个例子中,不会使用索引。 + +``` sql +SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' +``` + +要检查 ClickHouse 执行一个查询时能否使用索引,可设置 [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) 和 [force_primary_key](../../../operations/settings/settings.md) 。 + +使用按月分区的分区列允许只读取包含适当日期区间的数据块,这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有用主键字段作为条件的查询将会导致需要读取超过这个指定日期以外的数据。 + +### 部分单调主键的使用 + +考虑这样的场景,比如一个月中的天数。它们在一个月的范围内形成一个[单调序列](https://zh.wikipedia.org/wiki/单调函数) ,但如果扩展到更大的时间范围它们就不再单调了。这就是一个部分单调序列。如果用户使用部分单调的主键创建表,ClickHouse同样会创建一个稀疏索引。当用户从这类表中查询数据时,ClickHouse 会对查询条件进行分析。如果用户希望获取两个索引标记之间的数据并且这两个标记在一个月以内,ClickHouse 可以在这种特殊情况下使用到索引,因为它可以计算出查询参数与索引标记之间的距离。 + +如果查询参数范围内的主键不是单调序列,那么 ClickHouse 无法使用索引。在这种情况下,ClickHouse 会进行全表扫描。 + +ClickHouse 在任何主键代表一个部分单调序列的情况下都会使用这个逻辑。 + +### 跳数索引 {#tiao-shu-suo-yin-fen-duan-hui-zong-suo-yin-shi-yan-xing-de} + +此索引在 `CREATE` 语句的列部分里定义。 + +``` sql +INDEX index_name expr TYPE type(...) GRANULARITY granularity_value +``` + +`*MergeTree` 系列的表可以指定跳数索引。 +跳数索引是指数据片段按照粒度(建表时指定的`index_granularity`)分割成小块后,将上述SQL的granularity_value数量的小块组合成一个大的块,对这些大块写入索引信息,这样有助于使用`where`筛选时跳过大量不必要的数据,减少`SELECT`需要读取的数据量。 + +**示例** + +``` sql +CREATE TABLE table_name +( + u64 UInt64, + i32 Int32, + s String, + ... + INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, + INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4 +) ENGINE = MergeTree() +... +``` + +上例中的索引能让 ClickHouse 执行下面这些查询时减少读取数据量。 + +``` sql +SELECT count() FROM table WHERE s < 'z' +SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +``` + +#### 可用的索引类型 {#table_engine-mergetree-data_skipping-indexes} + +- `minmax` + 存储指定表达式的极值(如果表达式是 `tuple` ,则存储 `tuple` 中每个元素的极值),这些信息用于跳过数据块,类似主键。 + +- `set(max_rows)` + 存储指定表达式的不重复值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查数据块是否满足 `WHERE` 条件。 + +- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` + 存储一个包含数据块中所有 n元短语(ngram) 的 [布隆过滤器](https://en.wikipedia.org/wiki/Bloom_filter) 。只可用在字符串上。 + 可用于优化 `equals` , `like` 和 `in` 表达式的性能。 + - `n` – 短语长度。 + - `size_of_bloom_filter_in_bytes` – 布隆过滤器大小,字节为单位。(因为压缩得好,可以指定比较大的值,如 256 或 512)。 + - `number_of_hash_functions` – 布隆过滤器中使用的哈希函数的个数。 + - `random_seed` – 哈希函数的随机种子。 + +- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` + 跟 `ngrambf_v1` 类似,但是存储的是token而不是ngrams。Token是由非字母数字的符号分割的序列。 + +- `bloom_filter(bloom_filter([false_positive])` – 为指定的列存储布隆过滤器 + + 可选参数`false_positive`用来指定从布隆过滤器收到错误响应的几率。取值范围是 (0,1),默认值:0.025 + + 支持的数据类型:`Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`。 + + 以下函数会用到这个索引: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md) + +``` sql +INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 +INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4 +INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4 +``` + +#### 函数支持 {#functions-support} + +WHERE 子句中的条件可以包含对某列数据进行运算的函数表达式,如果列是索引的一部分,ClickHouse会在执行函数时尝试使用索引。不同的函数对索引的支持是不同的。 + +`set` 索引会对所有函数生效,其他索引对函数的生效情况见下表 + +| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | +| ------------------------------------------------------------ | ----------- | ------ | ---------- | ---------- | ------------ | +| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#equals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, <>)](../../../sql-reference/functions/comparison-functions.md#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | +| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | +| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | +| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [less (\<)](../../../sql-reference/functions/comparison-functions.md#less) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#greater) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [has](../../../sql-reference/functions/array-functions.md#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ | +| [hasAny](../../../sql-reference/functions/array-functions.md#function-hasAny) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ | +| [hasAll](../../../sql-reference/functions/array-functions.md#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | + +常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。 + +:::note +布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于结果返回为假的函数,例如: + +- 可以用来优化的场景 + - `s LIKE '%test%'` + - `NOT s NOT LIKE '%test%'` + - `s = 1` + - `NOT s != 1` + - `startsWith(s, 'test')` +- 不能用来优化的场景 + - `NOT s LIKE '%test%'` + - `s NOT LIKE '%test%'` + - `NOT s = 1` + - `s != 1` + - `NOT startsWith(s, 'test')` +::: + +## 并发数据访问 {#concurrent-data-access} + +对于表的并发访问,我们使用多版本机制。换言之,当一张表同时被读和更新时,数据从当前查询到的一组片段中读取。没有冗长的的锁。插入不会阻碍读取。 + +对表的读操作是自动并行的。 + +## 列和表的 TTL {#table_engine-mergetree-ttl} + +TTL用于设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。表级别的 TTL 还会指定数据在磁盘和卷上自动转移的逻辑。 + +TTL 表达式的计算结果必须是 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 或 [日期时间](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的字段。 + +示例: + +``` sql +TTL time_column +TTL time_column + interval +``` + +要定义`interval`, 需要使用 [时间间隔](../../../engines/table-engines/mergetree-family/mergetree.md#operators-datetime) 操作符。 + +``` sql +TTL date_time + INTERVAL 1 MONTH +TTL date_time + INTERVAL 15 HOUR +``` + +### 列 TTL {#mergetree-column-ttl} + +当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中删除此列。 + +`TTL`子句不能被用于主键字段。 + +**示例:** + +创建表时指定 `TTL` + +``` sql +CREATE TABLE example_table +( + d DateTime, + a Int TTL d + INTERVAL 1 MONTH, + b Int TTL d + INTERVAL 1 MONTH, + c String +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d; +``` + +为表中已存在的列字段添加 `TTL` + +``` sql +ALTER TABLE example_table + MODIFY COLUMN + c String TTL d + INTERVAL 1 DAY; +``` + +修改列字段的 `TTL` + +``` sql +ALTER TABLE example_table + MODIFY COLUMN + c String TTL d + INTERVAL 1 MONTH; +``` + +### 表 TTL {#mergetree-table-ttl} + +表可以设置一个用于移除过期行的表达式,以及多个用于在磁盘或卷上自动转移数据片段的表达式。当表中的行过期时,ClickHouse 会删除所有对应的行。对于数据片段的转移特性,必须所有的行都满足转移条件。 + +``` sql +TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] + +``` + +TTL 规则的类型紧跟在每个 TTL 表达式后面,它会影响满足表达式时(到达指定时间时)应当执行的操作: + +- `DELETE` - 删除过期的行(默认操作); +- `TO DISK 'aaa'` - 将数据片段移动到磁盘 `aaa`; +- `TO VOLUME 'bbb'` - 将数据片段移动到卷 `bbb`. +- `GROUP BY` - 聚合过期的行 + +使用`WHERE`从句,您可以指定哪些过期的行会被删除或聚合(不适用于移动)。`GROUP BY`表达式必须是表主键的前缀。如果某列不是`GROUP BY`表达式的一部分,也没有在SET从句显示引用,结果行中相应列的值是随机的(就好像使用了`any`函数)。 + +**示例**: + +创建时指定 TTL + +``` sql +CREATE TABLE example_table +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE, + d + INTERVAL 1 WEEK TO VOLUME 'aaa', + d + INTERVAL 2 WEEK TO DISK 'bbb'; +``` + +修改表的 `TTL` + +``` sql +ALTER TABLE example_table + MODIFY TTL d + INTERVAL 1 DAY; +``` + +创建一张表,设置一个月后数据过期,这些过期的行中日期为星期一的删除: + +``` sql +CREATE TABLE table_with_where +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; +``` + +创建一张表,设置过期的列会被聚合。列`x`包含每组行中的最大值,`y`为最小值,`d`为可能任意值。 + +``` sql +CREATE TABLE table_for_aggregation +( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int +) +ENGINE = MergeTree +ORDER BY (k1, k2) +TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); +``` + +**删除数据** + +ClickHouse 在数据片段合并时会删除掉过期的数据。 + +当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 您可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。 + +如果在两次合并的时间间隔中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 。 + +## 使用多个块设备进行数据存储 {#table_engine-mergetree-multiple-volumes} + +### 介绍 {#introduction} + +MergeTree 系列表引擎可以将数据存储在多个块设备上。这对某些可以潜在被划分为“冷”“热”的表来说是很有用的。最新数据被定期的查询但只需要很小的空间。相反,详尽的历史数据很少被用到。如果有多块磁盘可用,那么“热”的数据可以放置在快速的磁盘上(比如 NVMe 固态硬盘或内存),“冷”的数据可以放在相对较慢的磁盘上(比如机械硬盘)。 + +数据片段是 `MergeTree` 引擎表的最小可移动单元。属于同一个数据片段的数据被存储在同一块磁盘上。数据片段会在后台自动的在磁盘间移动,也可以通过 [ALTER](../../../sql-reference/statements/alter.md#alter_move-partition) 查询来移动。 + +### 术语 {#terms} + +- 磁盘 — 挂载到文件系统的块设备 +- 默认磁盘 — 在服务器设置中通过 [path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) 参数指定的数据存储 +- 卷 — 相同磁盘的顺序列表 (类似于 [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)) +- 存储策略 — 卷的集合及他们之间的数据移动规则 + + 以上名称的信息在Clickhouse中系统表[system.storage_policies](https://clickhouse.com/docs/zh/operations/system-tables/storage_policies/#system_tables-storage_policies)和[system.disks](https://clickhouse.com/docs/zh/operations/system-tables/disks/#system_tables-disks)体现。为了应用存储策略,可以在建表时使用`storage_policy`设置。 + +### 配置 {#table_engine-mergetree-multiple-volumes_configure} + +磁盘、卷和存储策略应当在主配置文件 `config.xml` 或 `config.d` 目录中的独立文件中的 `` 标签内定义。 + +配置结构: + +``` xml + + + + /mnt/fast_ssd/clickhouse/ + + + /mnt/hdd1/clickhouse/ + 10485760 + + + /mnt/hdd2/clickhouse/ + 10485760 + + + ... + + + ... + +``` + +标签: + +- `` — 磁盘名,名称必须与其他磁盘不同. +- `path` — 服务器将用来存储数据 (`data` 和 `shadow` 目录) 的路径, 应当以 ‘/’ 结尾. +- `keep_free_space_bytes` — 需要保留的剩余磁盘空间. + +磁盘定义的顺序无关紧要。 + +存储策略配置: + +``` xml + + ... + + + + + disk_name_from_disks_configuration + 1073741824 + + + + + + + 0.2 + + + + + + + + ... + +``` + +标签: + +- `policy_name_N` — 策略名称,不能重复。 +- `volume_name_N` — 卷名称,不能重复。 +- `disk` — 卷中的磁盘。 +- `max_data_part_size_bytes` — 卷中的磁盘可以存储的数据片段的最大大小。 +- `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 + +配置示例: + +``` xml + + ... + + + + + disk1 + disk2 + + + + + + + + fast_ssd + 1073741824 + + + disk1 + + + 0.2 + + + + +
+ jbod1 +
+ + external + +
+
+
+ ... +
+``` + +在给出的例子中, `hdd_in_order` 策略实现了 [循环制](https://zh.wikipedia.org/wiki/循环制) 方法。因此这个策略只定义了一个卷(`single`),数据片段会以循环的顺序全部存储到它的磁盘上。当有多个类似的磁盘挂载到系统上,但没有配置 RAID 时,这种策略非常有用。请注意一个每个独立的磁盘驱动都并不可靠,您可能需要用3份或更多的复制份数来补偿它。 + +如果在系统中有不同类型的磁盘可用,可以使用 `moving_from_ssd_to_hdd`。`hot` 卷由 SSD 磁盘(`fast_ssd`)组成,这个卷上可以存储的数据片段的最大大小为 1GB。所有大于 1GB 的数据片段都会被直接存储到 `cold` 卷上,`cold` 卷包含一个名为 `disk1` 的 HDD 磁盘。 +同样,一旦 `fast_ssd` 被填充超过 80%,数据会通过后台进程向 `disk1` 进行转移。 + +存储策略中卷的枚举顺序是很重要的。因为当一个卷被充满时,数据会向下一个卷转移。磁盘的枚举顺序同样重要,因为数据是依次存储在磁盘上的。 + +在创建表时,可以应用存储策略: + +``` sql +CREATE TABLE table_with_non_default_policy ( + EventDate Date, + OrderID UInt64, + BannerID UInt64, + SearchPhrase String +) ENGINE = MergeTree +ORDER BY (OrderID, BannerID) +PARTITION BY toYYYYMM(EventDate) +SETTINGS storage_policy = 'moving_from_ssd_to_hdd' +``` + +`default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。您可以使用[ALTER TABLE ... MODIFY SETTING]来修改存储策略,新的存储策略应该包含所有以前的磁盘和卷,并使用相同的名称。 + +可以通过 [background_move_pool_size](../../../operations/server-configuration-parameters/settings.md#background_move_pool_size) 设置调整执行后台任务的线程数。 + +### 详细说明 {#details} + +对于 `MergeTree` 表,数据通过以下不同的方式写入到磁盘当中: + +- 插入(`INSERT`查询) +- 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations) +- 从另一个副本下载 +- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 + +除了数据变异和冻结分区以外的情况下,数据按照以下逻辑存储到卷或磁盘上: + +1. 首个卷(按定义顺序)拥有足够的磁盘空间存储数据片段(`unreserved_space > current_part_size`)并且允许存储给定数据片段的大小(`max_data_part_size_bytes > current_part_size`) +2. 在这个数据卷内,紧挨着先前存储数据的那块磁盘之后的磁盘,拥有比数据片段大的剩余空间。(`unreserved_space - keep_free_space_bytes > current_part_size`) + +更进一步,数据变异和分区冻结使用的是 [硬链接](https://en.wikipedia.org/wiki/Hard_link)。不同磁盘之间的硬链接是不支持的,所以在这种情况下数据片段都会被存储到原来的那一块磁盘上。 + +在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。 + +用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 + +数据移动不会妨碍到数据复制。也就是说,同一张表的不同副本可以指定不同的存储策略。 + +在后台合并和数据变异之后,旧的数据片段会在一定时间后被移除 (`old_parts_lifetime`)。在这期间,他们不能被移动到其他的卷或磁盘。也就是说,直到数据片段被完全移除,它们仍然会被磁盘占用空间计算在内。 + +## 使用S3进行数据存储 {#using-s3-data-storage} + +`MergeTree`系列表引擎允许使用[S3](https://aws.amazon.com/s3/)存储数据,需要修改磁盘类型为`S3`。 + +示例配置: + +``` xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + your_base64_encoded_customer_key + + http://proxy1 + http://proxy2 + + 10000 + 5000 + 10 + 4 + 1000 + /var/lib/clickhouse/disks/s3/ + false + + + ... + +``` + +必须的参数: + +- `endpoint` - S3的结点URL,以`path`或`virtual hosted`[格式](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html)书写。 +- `access_key_id` - S3的Access Key ID。 +- `secret_access_key` - S3的Secret Access Key。 + +可选参数: + +- `region` - S3的区域名称 +- `use_environment_credentials` - 从环境变量AWS_ACCESS_KEY_ID、AWS_SECRET_ACCESS_KEY和AWS_SESSION_TOKEN中读取认证参数。默认值为`false`。 +- `use_insecure_imds_request` - 如果设置为`true`,S3客户端在认证时会使用不安全的IMDS请求。默认值为`false`。 +- `proxy` - 访问S3结点URL时代理设置。每一个`uri`项的值都应该是合法的代理URL。 +- `connect_timeout_ms` - Socket连接超时时间,默认值为`10000`,即10秒。 +- `request_timeout_ms` - 请求超时时间,默认值为`5000`,即5秒。 +- `retry_attempts` - 请求失败后的重试次数,默认值为10。 +- `single_read_retries` - 读过程中连接丢失后重试次数,默认值为4。 +- `min_bytes_for_seek` - 使用查找操作,而不是顺序读操作的最小字节数,默认值为1000。 +- `metadata_path` - 本地存放S3元数据文件的路径,默认值为`/var/lib/clickhouse/disks//` +- `skip_access_check` - 如果为`true`,Clickhouse启动时不检查磁盘是否可用。默认为`false`。 +- `server_side_encryption_customer_key_base64` - 如果指定该项的值,请求时会加上为了访问SSE-C加密数据而必须的头信息。 + +S3磁盘也可以设置冷热存储: +```xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + + + + +
+ s3 +
+
+
+ + +
+ default +
+ + s3 + +
+ 0.2 +
+
+ ... +
+``` + +指定了`cold`选项后,本地磁盘剩余空间如果小于`move_factor * disk_size`,或有TTL设置时,数据就会定时迁移至S3了。 + +## 虚拟列 {#virtual-columns} + +- `_part` - 分区名称。 +- `_part_index` - 作为请求的结果,按顺序排列的分区数。 +- `_partition_id` — 分区名称。 +- `_part_uuid` - 唯一部分标识符(如果 MergeTree 设置`assign_part_uuids` 已启用)。 +- `_partition_value` — `partition by` 表达式的值(元组)。 +- `_sample_factor` - 采样因子(来自请求)。 diff --git a/docs/zh/faq/general/dbms-naming.md b/docs/zh/faq/general/dbms-naming.md index e732c2f054e..f24b3134093 100644 --- a/docs/zh/faq/general/dbms-naming.md +++ b/docs/zh/faq/general/dbms-naming.md @@ -1,18 +1,18 @@ ---- +--- slug: /zh/faq/general/dbms-naming -title: "\u201CClickHouse\u201D 有什么含义?" -toc_hidden: true -sidebar_position: 10 ---- - -# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean} - -它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上,ClickHouse本应该保存人们在互联网上的所有点击记录,现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。 - -这个由两部分组成的意思有两个结果: - -- 唯一正确的写“Click**H**ouse”的方式是用大写H。 -- 如果需要缩写,请使用“**CH**”。由于一些历史原因,缩写CK在中国也很流行,主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。 - -!!! info “有趣的事实” - 多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。 +title: "\u201CClickHouse\u201D 有什么含义?" +toc_hidden: true +sidebar_position: 10 +--- + +# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean} + +它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上,ClickHouse本应该保存人们在互联网上的所有点击记录,现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。 + +这个由两部分组成的意思有两个结果: + +- 唯一正确的写“Click**H**ouse”的方式是用大写H。 +- 如果需要缩写,请使用“**CH**”。由于一些历史原因,缩写CK在中国也很流行,主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。 + +!!! info “有趣的事实” + 多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。 diff --git a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md index daa7abf525f..16f48baf7ef 100644 --- a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md +++ b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md @@ -1,18 +1,18 @@ ---- +--- slug: /zh/faq/general/how-do-i-contribute-code-to-clickhouse -title: 我如何为ClickHouse贡献代码? -toc_hidden: true -sidebar_position: 120 ---- - -# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse} - -ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。 - -按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。 - -如果你想对ClickHouse提出实质性的改变建议,可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。 - -如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。 - - +title: 我如何为ClickHouse贡献代码? +toc_hidden: true +sidebar_position: 120 +--- + +# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse} + +ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。 + +按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。 + +如果你想对ClickHouse提出实质性的改变建议,可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。 + +如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。 + + diff --git a/docs/zh/faq/integration/index.md b/docs/zh/faq/integration/index.md index 3a3f97761f3..b0ca2d05c05 100644 --- a/docs/zh/faq/integration/index.md +++ b/docs/zh/faq/integration/index.md @@ -1,22 +1,22 @@ ---- -slug: /zh/faq/integration/ -title: 关于集成ClickHouse和其他系统的问题 -toc_hidden_folder: true -sidebar_position: 4 -sidebar_label: Integration ---- - -# 关于集成ClickHouse和其他系统的问题 {#question-about-integrating-clickhouse-and-other-systems} - -问题: - -- [如何从 ClickHouse 导出数据到一个文件?](../../faq/integration/file-export.md) -- [如何导入JSON到ClickHouse?](../../faq/integration/json-import.md) -- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../../faq/integration/oracle-odbc.md) - - - -!!! info "没看到你要找的东西吗?" - 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。 - +--- +slug: /zh/faq/integration/ +title: 关于集成ClickHouse和其他系统的问题 +toc_hidden_folder: true +sidebar_position: 4 +sidebar_label: Integration +--- + +# 关于集成ClickHouse和其他系统的问题 {#question-about-integrating-clickhouse-and-other-systems} + +问题: + +- [如何从 ClickHouse 导出数据到一个文件?](../../faq/integration/file-export.md) +- [如何导入JSON到ClickHouse?](../../faq/integration/json-import.md) +- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../../faq/integration/oracle-odbc.md) + + + +!!! info "没看到你要找的东西吗?" + 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。 + {## [原文](https://clickhouse.com/docs/en/faq/integration/) ##} \ No newline at end of file diff --git a/docs/zh/faq/operations/index.md b/docs/zh/faq/operations/index.md index 153eda6199a..1fe84655ada 100644 --- a/docs/zh/faq/operations/index.md +++ b/docs/zh/faq/operations/index.md @@ -1,21 +1,21 @@ ---- -slug: /zh/faq/operations/ -title: 关于操作ClickHouse服务器和集群的问题 -toc_hidden_folder: true -sidebar_position: 3 -sidebar_label: Operations ---- - -# 关于操作ClickHouse服务器和集群的问题 {#question-about-operating-clickhouse-servers-and-clusters} - -问题: - -- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md) -- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md) -- [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md) - - -!!! info "没看到你要找的东西吗?" - 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。 - -{## [原文](https://clickhouse.com/docs/en/faq/production/) ##} +--- +slug: /zh/faq/operations/ +title: 关于操作ClickHouse服务器和集群的问题 +toc_hidden_folder: true +sidebar_position: 3 +sidebar_label: Operations +--- + +# 关于操作ClickHouse服务器和集群的问题 {#question-about-operating-clickhouse-servers-and-clusters} + +问题: + +- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md) +- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md) +- [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md) + + +!!! info "没看到你要找的东西吗?" + 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。 + +{## [原文](https://clickhouse.com/docs/en/faq/production/) ##} diff --git a/docs/zh/faq/operations/multi-region-replication.md b/docs/zh/faq/operations/multi-region-replication.md index 05f856a9ea7..14df8b72eff 100644 --- a/docs/zh/faq/operations/multi-region-replication.md +++ b/docs/zh/faq/operations/multi-region-replication.md @@ -1,15 +1,15 @@ ---- +--- slug: /zh/faq/operations/multi-region-replication -title: ClickHouse支持多区域复制吗? -toc_hidden: true -sidebar_position: 30 ---- - -# ClickHouse支持多区域复制吗? {#does-clickhouse-support-multi-region-replication} - -简短的回答是“是的”。然而,我们建议将所有区域/数据中心之间的延迟保持在两位数字范围内,否则,在通过分布式共识协议时,写性能将受到影响。例如,美国海岸之间的复制可能会很好,但美国和欧洲之间就不行。 - -在配置方面,这与单区域复制没有区别,只是使用位于不同位置的主机作为副本。 - -更多信息,请参见[关于数据复制的完整文章](../../engines/table-engines/mergetree-family/replication.md)。 - +title: ClickHouse支持多区域复制吗? +toc_hidden: true +sidebar_position: 30 +--- + +# ClickHouse支持多区域复制吗? {#does-clickhouse-support-multi-region-replication} + +简短的回答是“是的”。然而,我们建议将所有区域/数据中心之间的延迟保持在两位数字范围内,否则,在通过分布式共识协议时,写性能将受到影响。例如,美国海岸之间的复制可能会很好,但美国和欧洲之间就不行。 + +在配置方面,这与单区域复制没有区别,只是使用位于不同位置的主机作为副本。 + +更多信息,请参见[关于数据复制的完整文章](../../engines/table-engines/mergetree-family/replication.md)。 + diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 86e205ea401..1874970ac95 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1203,7 +1203,7 @@ ClickHouse生成异常 - 类型:布尔 - 默认值:True -启用数据格式的保序并行分析。 仅支持TSV,TKSV,CSV和JSONEachRow格式。 +启用数据格式的保序并行分析。 仅支持TSV,TSKV,CSV和JSONEachRow格式。 ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} diff --git a/docs/zh/operations/system-tables/storage_policies.md b/docs/zh/operations/system-tables/storage_policies.md index e29915a98da..27219f789f4 100644 --- a/docs/zh/operations/system-tables/storage_policies.md +++ b/docs/zh/operations/system-tables/storage_policies.md @@ -13,6 +13,5 @@ slug: /zh/operations/system-tables/storage_policies - `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — 存储策略中定义的磁盘名。 - `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 可以存储在卷磁盘上数据部分的最大大小 (0 - 不限制)。 - `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — 磁盘空闲的比率。当比率超过配置的值,ClickHouse 将把数据向下一个卷移动。 -- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 设置中 `prefer_not_to_merge` 的值. 当这个设置启用时,不允许在此卷上合并数据。这将允许控制 ClickHouse 如何与运行速度较慢的磁盘一起工作。 如果存储策略包含多个卷,则每个卷的信息将在表中作为单独一行存储。 diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md index 01f702a4b1e..3286fc9f9e7 100644 --- a/docs/zh/sql-reference/statements/select/order-by.md +++ b/docs/zh/sql-reference/statements/select/order-by.md @@ -61,6 +61,22 @@ sidebar_label: ORDER BY 我们只建议使用 `COLLATE` 对于少量行的最终排序,因为排序与 `COLLATE` 比正常的按字节排序效率低。 +## ORDER BY ALL + +`ORDER BY ALL` 对所有选定的列进行升序排序。 + +示例: + +``` sql +SELECT a, b, c FROM t ORDER BY ALL +``` + +等同于: + +``` sql +SELECT a, b, c FROM t ORDER BY a, b, c +``` + ## 实现细节 {#implementation-details} 更少的RAM使用,如果一个足够小 [LIMIT](../../../sql-reference/statements/select/limit.md) 除了指定 `ORDER BY`. 否则,所花费的内存量与用于排序的数据量成正比。 对于分布式查询处理,如果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 省略排序,在远程服务器上部分完成排序,并将结果合并到请求者服务器上。 这意味着对于分布式排序,要排序的数据量可以大于单个服务器上的内存量。 diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md index a62fa9ebb19..f7384a7526e 100644 --- a/docs/zh/sql-reference/table-functions/s3.md +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -11,7 +11,7 @@ sidebar_label: s3 **语法** ``` sql -s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +s3(path [,access_key_id, secret_access_key [,session_token]] ,format, structure, [compression]) ``` **参数** diff --git a/programs/bash-completion/completions/ch b/programs/bash-completion/completions/ch new file mode 120000 index 00000000000..7101fd9ed04 --- /dev/null +++ b/programs/bash-completion/completions/ch @@ -0,0 +1 @@ +clickhouse \ No newline at end of file diff --git a/programs/bash-completion/completions/chc b/programs/bash-completion/completions/chc new file mode 100644 index 00000000000..0e34cd4eab2 --- /dev/null +++ b/programs/bash-completion/completions/chc @@ -0,0 +1,2 @@ +[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap" +_complete_clickhouse_generic chc diff --git a/programs/bash-completion/completions/chl b/programs/bash-completion/completions/chl new file mode 100644 index 00000000000..6d0338bf122 --- /dev/null +++ b/programs/bash-completion/completions/chl @@ -0,0 +1,2 @@ +[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap" +_complete_clickhouse_generic chl diff --git a/programs/bash-completion/completions/clickhouse b/programs/bash-completion/completions/clickhouse index fc55398dcf1..ff0a60c60be 100644 --- a/programs/bash-completion/completions/clickhouse +++ b/programs/bash-completion/completions/clickhouse @@ -31,3 +31,4 @@ function _complete_for_clickhouse_entrypoint_bin() } _complete_clickhouse_generic clickhouse _complete_for_clickhouse_entrypoint_bin +_complete_clickhouse_generic ch _complete_for_clickhouse_entrypoint_bin diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index d6b8b38d84d..59fc6c0c17f 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -35,7 +35,6 @@ #include #include #include -#include /** A tool for evaluating ClickHouse performance. @@ -405,7 +404,7 @@ private: || sigaddset(&sig_set, SIGINT) || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) { - throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL); + throw ErrnoException(ErrorCodes::CANNOT_BLOCK_SIGNAL, "Cannot block signal"); } while (true) diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 8f24d13d379..e3371185aad 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -159,6 +160,7 @@ void ClusterCopierApp::mainImpl() registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); + registerDatabases(); registerStorages(); registerDictionaries(); registerDisks(/* global_skip_access_check= */ true); diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 421e4038d12..f176fa277d7 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -36,7 +36,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 2) @@ -51,8 +51,8 @@ public: const String & path_from = command_arguments[0]; const String & path_to = command_arguments[1]; - DiskPtr disk_from = global_context->getDisk(disk_name_from); - DiskPtr disk_to = global_context->getDisk(disk_name_to); + DiskPtr disk_from = disk_selector->get(disk_name_from); + DiskPtr disk_to = disk_selector->get(disk_name_to); String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 357832865fb..dbaa3162f82 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -27,7 +27,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 2) @@ -41,7 +41,7 @@ public: const String & path_from = command_arguments[0]; const String & path_to = command_arguments[1]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 48b54b70014..ea84cd0682d 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -33,7 +33,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -46,7 +46,7 @@ public: const String & path = command_arguments[0]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(path); diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 7b2fcd16107..79da021fd00 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -26,8 +26,8 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, - Poco::Util::LayeredConfiguration &) override + std::shared_ptr &, + Poco::Util::LayeredConfiguration & config) override { if (!command_arguments.empty()) { @@ -35,8 +35,29 @@ public: throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } - for (const auto & [disk_name, _] : global_context->getDisksMap()) - std::cout << disk_name << '\n'; + constexpr auto config_prefix = "storage_configuration.disks"; + constexpr auto default_disk_name = "default"; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + + bool has_default_disk = false; + + /// For the output to be ordered + std::set disks; + + for (const auto & disk_name : keys) + { + if (disk_name == default_disk_name) + has_default_disk = true; + disks.insert(disk_name); + } + + if (!has_default_disk) + disks.insert(default_disk_name); + + for (const auto & disk : disks) + std::cout << disk << '\n'; } }; } diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index e5df982d896..6d33bdec498 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -34,7 +34,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -47,7 +47,7 @@ public: const String & path = command_arguments[0]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(path); bool recursive = config.getBool("recursive", false); diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 654090b2138..75cf96252ed 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -26,7 +26,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 2) @@ -40,7 +40,7 @@ public: const String & path_from = command_arguments[0]; const String & path_to = command_arguments[1]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index b6cacdd2c61..85041faf22c 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -36,7 +36,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -47,7 +47,7 @@ public: String disk_name = config.getString("disk", "default"); - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(command_arguments[0]); diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index ff8d4a1c6bb..0c631eacff3 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -26,7 +26,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -39,7 +39,7 @@ public: const String & path = command_arguments[0]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(path); diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index d075daf3215..7ded37e067a 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -37,7 +37,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -50,7 +50,7 @@ public: const String & path = command_arguments[0]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(path); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index b81cd52f8c8..ded324fd0da 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -209,7 +209,35 @@ int DisksApp::main(const std::vector & /*args*/) po::parsed_options parsed = parser.run(); args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); } - command->execute(args, global_context, config()); + + std::unordered_set disks + { + config().getString("disk", "default"), + config().getString("disk-from", config().getString("disk", "default")), + config().getString("disk-to", config().getString("disk", "default")), + }; + + auto validator = [&disks]( + const Poco::Util::AbstractConfiguration & config, + const std::string & disk_config_prefix, + const std::string & disk_name) + { + if (!disks.contains(disk_name)) + return false; + + const auto disk_type = config.getString(disk_config_prefix + ".type", "local"); + + if (disk_type == "cache") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk type 'cache' of disk {} is not supported by clickhouse-disks", disk_name); + + return true; + }; + + constexpr auto config_prefix = "storage_configuration.disks"; + auto disk_selector = std::make_shared(); + disk_selector->initialize(config(), config_prefix, global_context, validator); + + command->execute(args, disk_selector, config()); return Application::EXIT_OK; } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index de41eedec35..da106e1084e 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -25,7 +26,7 @@ public: virtual void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) = 0; const std::optional & getCommandOptions() const { return command_option_description; } diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index d7d61bbcd3b..05ba86069d7 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +34,9 @@ #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wmissing-declarations" +extern const char * auto_time_zones[]; + + namespace DB { namespace ErrorCodes @@ -126,6 +131,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); + registerDatabases(); registerStorages(); registerFormats(); @@ -133,9 +139,25 @@ int mainEntryClickHouseFormat(int argc, char ** argv) auto all_known_storage_names = StorageFactory::instance().getAllRegisteredNames(); auto all_known_data_type_names = DataTypeFactory::instance().getAllRegisteredNames(); + auto all_known_settings = Settings().getAllRegisteredNames(); + auto all_known_merge_tree_settings = MergeTreeSettings().getAllRegisteredNames(); additional_names.insert(all_known_storage_names.begin(), all_known_storage_names.end()); additional_names.insert(all_known_data_type_names.begin(), all_known_data_type_names.end()); + additional_names.insert(all_known_settings.begin(), all_known_settings.end()); + additional_names.insert(all_known_merge_tree_settings.begin(), all_known_merge_tree_settings.end()); + + for (auto * it = auto_time_zones; *it; ++it) + { + String time_zone_name = *it; + + /// Example: Europe/Amsterdam + Strings split; + boost::split(split, time_zone_name, [](char c){ return c == '/'; }); + for (const auto & word : split) + if (!word.empty()) + additional_names.insert(word); + } KnownIdentifierFunc is_known_identifier = [&](std::string_view name) { diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 9d4d791263b..52f30098b38 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -328,7 +328,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) fs::create_symlink(binary_self_canonical_path, main_bin_path); if (0 != chmod(binary_self_canonical_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) - throwFromErrno(fmt::format("Cannot chmod {}", binary_self_canonical_path.string()), ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot chmod {}", binary_self_canonical_path.string()); } } else @@ -361,7 +361,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (already_installed) { if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) - throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot chmod {}", main_bin_path.string()); } else { @@ -395,7 +395,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } if (0 != chmod(destination.c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) - throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot chmod {}", main_bin_tmp_path.string()); } catch (const Exception & e) { @@ -1122,7 +1122,7 @@ namespace return 0; } else - throwFromErrno(fmt::format("Cannot obtain the status of pid {} with `kill`", pid), ErrorCodes::CANNOT_KILL); + throw ErrnoException(ErrorCodes::CANNOT_KILL, "Cannot obtain the status of pid {} with `kill`", pid); } if (!pid) @@ -1143,7 +1143,7 @@ namespace if (0 == kill(pid, signal)) fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid); else - throwFromErrno(fmt::format("Cannot send {} signal", signal_name), ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot send {} signal", signal_name); size_t try_num = 0; for (; try_num < max_tries; ++try_num) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index cdb1d89b18e..b8f538f821c 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -68,6 +68,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp @@ -114,6 +115,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/LocalDirectorySyncGuard.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/TemporaryFileOnDisk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/loadLocalDiskConfig.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index e04e669abae..48d26233d94 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include "Core/Defines.h" #include "config.h" @@ -289,6 +291,33 @@ try if (!config().has("keeper_server")) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Keeper configuration ( section) not found in config"); + auto updateMemorySoftLimitInConfig = [&](Poco::Util::AbstractConfiguration & config) + { + UInt64 memory_soft_limit = 0; + if (config.has("keeper_server.max_memory_usage_soft_limit")) + { + memory_soft_limit = config.getUInt64("keeper_server.max_memory_usage_soft_limit"); + } + + /// if memory soft limit is not set, we will use default value + if (memory_soft_limit == 0) + { + Float64 ratio = 0.9; + if (config.has("keeper_server.max_memory_usage_soft_limit_ratio")) + ratio = config.getDouble("keeper_server.max_memory_usage_soft_limit_ratio"); + + size_t physical_server_memory = getMemoryAmount(); + if (ratio > 0 && physical_server_memory > 0) + { + memory_soft_limit = static_cast(physical_server_memory * ratio); + config.setUInt64("keeper_server.max_memory_usage_soft_limit", memory_soft_limit); + } + } + LOG_INFO(log, "keeper_server.max_memory_usage_soft_limit is set to {}", formatReadableSizeWithBinarySuffix(memory_soft_limit)); + }; + + updateMemorySoftLimitInConfig(config()); + std::string path; if (config().has("keeper_server.storage_path")) @@ -328,6 +357,13 @@ try config().getUInt("max_thread_pool_free_size", 1000), config().getUInt("thread_pool_queue_size", 10000) ); + /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). + SCOPE_EXIT({ + Stopwatch watch; + LOG_INFO(log, "Waiting for background threads"); + GlobalThreadPool::instance().shutdown(); + LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); + }); static ServerErrorHandler error_handler; Poco::ErrorHandler::set(&error_handler); @@ -459,6 +495,29 @@ try std::make_unique( std::move(my_http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); }); + + /// HTTP control endpoints + port_name = "keeper_server.http_control.port"; + createServer(listen_host, port_name, listen_try, [&](UInt16 port) mutable + { + auto my_http_context = httpContext(); + Poco::Timespan my_keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr my_http_params = new Poco::Net::HTTPServerParams; + my_http_params->setTimeout(my_http_context->getReceiveTimeout()); + my_http_params->setKeepAliveTimeout(my_keep_alive_timeout); + + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(my_http_context->getReceiveTimeout()); + socket.setSendTimeout(my_http_context->getSendTimeout()); + servers->emplace_back( + listen_host, + port_name, + "HTTP Control: http://" + address.toString(), + std::make_unique( + std::move(my_http_context), createKeeperHTTPControlMainHandlerFactory(config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params) + ); + }); } for (auto & server : *servers) @@ -492,6 +551,8 @@ try { updateLevels(*config, logger()); + updateMemorySoftLimitInConfig(*config); + if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index fbb64ea1135..ccd3d84630f 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,7 @@ #include #include #include -#include +#include #include #include #include @@ -489,6 +490,7 @@ try registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); + registerDatabases(); registerStorages(); registerDictionaries(); registerDisks(/* global_skip_access_check= */ true); @@ -726,12 +728,7 @@ void LocalServer::processConfig() /// We load temporary database first, because projections need it. DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase(); - /** Init dummy default DB - * NOTE: We force using isolated default database to avoid conflicts with default database from server environment - * Otherwise, metadata of temporary File(format, EXPLICIT_PATH) tables will pollute metadata/ directory; - * if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons. - */ - std::string default_database = config().getString("default_database", "_local"); + std::string default_database = config().getString("default_database", "default"); DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context)); global_context->setCurrentDatabase(default_database); @@ -744,7 +741,7 @@ void LocalServer::processConfig() LOG_DEBUG(log, "Loading metadata from {}", path); auto startup_system_tasks = loadMetadataSystem(global_context); - attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); + attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks); @@ -757,13 +754,13 @@ void LocalServer::processConfig() } /// For ClickHouse local if path is not set the loader will be disabled. - global_context->getUserDefinedSQLObjectsLoader().loadObjects(); + global_context->getUserDefinedSQLObjectsStorage().loadObjects(); LOG_DEBUG(log, "Loaded metadata."); } else if (!config().has("no-system-tables")) { - attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); + attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); } @@ -776,6 +773,7 @@ void LocalServer::processConfig() global_context->setQueryKindInitial(); global_context->setQueryKind(query_kind); + global_context->setQueryParameters(query_parameters); } @@ -822,6 +820,7 @@ void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & o std::cout << getHelpHeader() << "\n"; std::cout << options_description.main_description.value() << "\n"; std::cout << getHelpFooter() << "\n"; + std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n"; #endif } @@ -898,7 +897,31 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum for (int arg_num = 1; arg_num < argc; ++arg_num) { std::string_view arg = argv[arg_num]; - if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-')) + /// Parameter arg after underline. + if (arg.starts_with("--param_")) + { + auto param_continuation = arg.substr(strlen("--param_")); + auto equal_pos = param_continuation.find_first_of('='); + + if (equal_pos == std::string::npos) + { + /// param_name value + ++arg_num; + if (arg_num >= argc) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter requires value"); + arg = argv[arg_num]; + query_parameters.emplace(String(param_continuation), String(arg)); + } + else + { + if (equal_pos == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter name cannot be empty"); + + /// param_name=value + query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1)); + } + } + else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-')) { /// Transform the abbreviated syntax '--multiquery ' into the full syntax '--multiquery -q ' ++arg_num; diff --git a/programs/main.cpp b/programs/main.cpp index 959984d565d..7d07112de66 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -158,7 +158,6 @@ std::pair clickhouse_applications[] = std::pair clickhouse_short_names[] = { #if ENABLE_CLICKHOUSE_LOCAL - {"ch", "local"}, {"chl", "local"}, #endif #if ENABLE_CLICKHOUSE_CLIENT @@ -502,6 +501,17 @@ int main(int argc_, char ** argv_) } } + /// Interpret binary without argument or with arguments starts with dash + /// ('-') as clickhouse-local for better usability: + /// + /// clickhouse # dumps help + /// clickhouse -q 'select 1' # use local + /// clickhouse # spawn local + /// clickhouse local # spawn local + /// + if (main_func == printHelp && !argv.empty() && (argv.size() == 1 || argv[1][0] == '-')) + main_func = mainEntryClickHouseLocal; + return main_func(static_cast(argv.size()), argv.data()); } #endif diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 2cb5250cdf2..7e09d5e8046 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -1307,7 +1307,7 @@ try /// stdin must be seekable auto res = lseek(file->getFD(), 0, SEEK_SET); if (-1 == res) - throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)"); SingleReadBufferIterator read_buffer_iterator(std::move(file)); schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const); @@ -1336,7 +1336,7 @@ try /// stdin must be seekable auto res = lseek(file_in.getFD(), 0, SEEK_SET); if (-1 == res) - throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)"); } Obfuscator obfuscator(header, seed, markov_model_params); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 36f0ce90e57..926e57070f3 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -66,11 +67,12 @@ #include #include #include -#include +#include #include #include #include #include +#include #include #include #include @@ -92,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -646,6 +649,7 @@ try registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); + registerDatabases(); registerStorages(); registerDictionaries(); registerDisks(/* global_skip_access_check= */ false); @@ -657,6 +661,11 @@ try CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); + Poco::ThreadPool server_pool(3, server_settings.max_connections); + std::mutex servers_lock; + std::vector servers; + std::vector servers_to_start_before_tables; + /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases, ... */ @@ -697,6 +706,68 @@ try server_settings.max_thread_pool_size, server_settings.max_thread_pool_free_size, server_settings.thread_pool_queue_size); + /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). + SCOPE_EXIT({ + Stopwatch watch; + LOG_INFO(log, "Waiting for background threads"); + GlobalThreadPool::instance().shutdown(); + LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); + }); + + /// NOTE: global context should be destroyed *before* GlobalThreadPool::shutdown() + /// Otherwise GlobalThreadPool::shutdown() will hang, since Context holds some threads. + SCOPE_EXIT({ + /** Ask to cancel background jobs all table engines, + * and also query_log. + * It is important to do early, not in destructor of Context, because + * table engines could use Context on destroy. + */ + LOG_INFO(log, "Shutting down storages."); + + global_context->shutdown(); + + LOG_DEBUG(log, "Shut down storages."); + + if (!servers_to_start_before_tables.empty()) + { + LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); + size_t current_connections = 0; + { + std::lock_guard lock(servers_lock); + for (auto & server : servers_to_start_before_tables) + { + server.stop(); + current_connections += server.currentConnections(); + } + } + + if (current_connections) + LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + else + LOG_INFO(log, "Closed all listening sockets."); + + if (current_connections > 0) + current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished); + + if (current_connections) + LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); + else + LOG_INFO(log, "Closed connections to servers for tables."); + } + + global_context->shutdownKeeperDispatcher(); + + /// Wait server pool to avoid use-after-free of destroyed context in the handlers + server_pool.joinAll(); + + /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. + * At this moment, no one could own shared part of Context. + */ + global_context.reset(); + shared_context.reset(); + LOG_DEBUG(log, "Destroyed global context."); + }); + #if USE_AZURE_BLOB_STORAGE /// It makes sense to deinitialize libxml after joining of all threads @@ -755,10 +826,6 @@ try } } - Poco::ThreadPool server_pool(3, server_settings.max_connections); - std::mutex servers_lock; - std::vector servers; - std::vector servers_to_start_before_tables; /// This object will periodically calculate some metrics. ServerAsynchronousMetrics async_metrics( global_context, @@ -1280,9 +1347,10 @@ try global_context->setHTTPHeaderFilter(*config); global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop); - global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers); - global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header); global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop); + global_context->setMaxTableNumToWarn(server_settings_.max_table_num_to_warn); + global_context->setMaxDatabaseNumToWarn(server_settings_.max_database_num_to_warn); + global_context->setMaxPartNumToWarn(server_settings_.max_part_num_to_warn); ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited; if (server_settings_.concurrent_threads_soft_limit_num > 0 && server_settings_.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) @@ -1385,8 +1453,6 @@ try global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); - global_context->reloadQueryMaskingRulesIfChanged(config); - std::lock_guard lock(servers_lock); updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables); } @@ -1407,6 +1473,8 @@ try #endif NamedCollectionUtils::reloadFromConfig(*config); + FileCacheFactory::instance().updateSettingsFromConfig(*config); + ProfileEvents::increment(ProfileEvents::MainConfigLoads); /// Must be the last. @@ -1490,6 +1558,34 @@ try throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); + + /// HTTP control endpoints + port_name = "keeper_server.http_control.port"; + createServer(config(), listen_host, port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { + auto http_context = httpContext(); + Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(http_context->getReceiveTimeout()); + http_params->setKeepAliveTimeout(keep_alive_timeout); + + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config(), socket, listen_host, port); + socket.setReceiveTimeout(http_context->getReceiveTimeout()); + socket.setSendTimeout(http_context->getSendTimeout()); + return ProtocolServerAdapter( + listen_host, + port_name, + "HTTP Control: http://" + address.toString(), + std::make_unique( + std::move(http_context), + createKeeperHTTPControlMainHandlerFactory( + config_getter(), + global_context->getKeeperDispatcher(), + "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)); + }); } #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); @@ -1600,60 +1696,6 @@ try /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start. CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs"); - SCOPE_EXIT({ - async_metrics.stop(); - - /** Ask to cancel background jobs all table engines, - * and also query_log. - * It is important to do early, not in destructor of Context, because - * table engines could use Context on destroy. - */ - LOG_INFO(log, "Shutting down storages."); - - global_context->shutdown(); - - LOG_DEBUG(log, "Shut down storages."); - - if (!servers_to_start_before_tables.empty()) - { - LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); - size_t current_connections = 0; - { - std::lock_guard lock(servers_lock); - for (auto & server : servers_to_start_before_tables) - { - server.stop(); - current_connections += server.currentConnections(); - } - } - - if (current_connections) - LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); - else - LOG_INFO(log, "Closed all listening sockets."); - - if (current_connections > 0) - current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished); - - if (current_connections) - LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); - else - LOG_INFO(log, "Closed connections to servers for tables."); - - global_context->shutdownKeeperDispatcher(); - } - - /// Wait server pool to avoid use-after-free of destroyed context in the handlers - server_pool.joinAll(); - - /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. - * At this moment, no one could own shared part of Context. - */ - global_context.reset(); - shared_context.reset(); - LOG_DEBUG(log, "Destroyed global context."); - }); - /// DNSCacheUpdater uses BackgroundSchedulePool which lives in shared context /// and thus this object must be created after the SCOPE_EXIT object where shared /// context is destroyed. @@ -1716,7 +1758,7 @@ try /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); /// Load user-defined SQL functions. - global_context->getUserDefinedSQLObjectsLoader().loadObjects(); + global_context->getUserDefinedSQLObjectsStorage().loadObjects(); } catch (...) { @@ -2088,10 +2130,9 @@ void Server::createServers( { const Settings & settings = global_context->getSettingsRef(); - Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(keep_alive_timeout); + http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); Poco::Util::AbstractConfiguration::Keys protocols; config.keys("protocols", protocols); @@ -2345,10 +2386,9 @@ void Server::createInterserverServers( { const Settings & settings = global_context->getSettingsRef(); - Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(keep_alive_timeout); + http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) diff --git a/programs/server/binary.html b/programs/server/binary.html new file mode 100644 index 00000000000..988dd33a72a --- /dev/null +++ b/programs/server/binary.html @@ -0,0 +1,267 @@ + + + + + + ClickHouse Binary Viewer + + + + + +
+
+ + + + + diff --git a/programs/server/config.xml b/programs/server/config.xml index 688f0bf5645..1be20c5cad8 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -485,7 +485,6 @@ true - false round_robin diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index c62e0c98184..04fdfb2d3ca 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -455,6 +455,7 @@
+
@@ -720,7 +721,7 @@ function insertChart(i) { query_editor_confirm.addEventListener('click', editConfirm); /// Ctrl+Enter (or Cmd+Enter on Mac) will also confirm editing. - query_editor.addEventListener('keydown', e => { + query_editor.addEventListener('keydown', event => { if ((event.metaKey || event.ctrlKey) && (event.keyCode == 13 || event.keyCode == 10)) { editConfirm(); } @@ -895,7 +896,7 @@ document.getElementById('add').addEventListener('click', e => { }); document.getElementById('reload').addEventListener('click', e => { - reloadAll(false); + reloadAll(queries.length == 0); }); document.getElementById('search').addEventListener('click', e => { @@ -964,12 +965,10 @@ document.getElementById('mass-editor-textarea').addEventListener('input', e => { function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) { let legendEl; - let showTop = false; - const showLimit = 5; + let multiline; function init(u, opts) { legendEl = u.root.querySelector(".u-legend"); - legendEl.classList.remove("u-inline"); className && legendEl.classList.add(className); @@ -985,18 +984,19 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- ...style }); + const nodes = legendEl.querySelectorAll("th"); + for (let i = 0; i < nodes.length; i++) + nodes[i]._order = i; + if (opts.series.length == 2) { - const nodes = legendEl.querySelectorAll("th"); + multiline = false; for (let i = 0; i < nodes.length; i++) nodes[i].style.display = "none"; } else { + multiline = true; legendEl.querySelector("th").remove(); legendEl.querySelector("td").setAttribute('colspan', '2'); legendEl.querySelector("td").style.textAlign = 'center'; - } - - if (opts.series.length - 1 > showLimit) { - showTop = true; let footer = legendEl.insertRow().insertCell(); footer.setAttribute('colspan', '2'); footer.style.textAlign = 'center'; @@ -1023,18 +1023,20 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- left -= legendEl.clientWidth / 2; top -= legendEl.clientHeight / 2; legendEl.style.transform = "translate(" + left + "px, " + top + "px)"; - if (showTop) { + + if (multiline) { let nodes = nodeListToArray(legendEl.querySelectorAll("tr")); let header = nodes.shift(); let footer = nodes.pop(); - nodes.forEach(function (node) { node._sort_key = +node.querySelector("td").textContent; }); - nodes.sort((a, b) => +b._sort_key - +a._sort_key); + let showLimit = Math.floor(u.height / 30); + nodes.forEach(function (node) { node._sort_key = nodes.length > showLimit ? +node.querySelector("td").textContent.replace(/,/g,'') : node._order; }); + nodes.sort((a, b) => b._sort_key - a._sort_key); nodes.forEach(function (node) { node.parentNode.appendChild(node); }); for (let i = 0; i < nodes.length; i++) { nodes[i].style.display = i < showLimit ? null : "none"; - delete nodes[i]._sort_key; } footer.parentNode.appendChild(footer); + footer.style.display = nodes.length > showLimit ? null : "none"; } } @@ -1291,6 +1293,7 @@ async function drawAll() { document.getElementById('add').style.display = 'inline-block'; document.getElementById('edit').style.display = 'inline-block'; document.getElementById('search-span').style.display = ''; + hideError(); } else { const charts = document.getElementById('charts') @@ -1317,9 +1320,11 @@ function disableButtons() { reloadButton.classList.add('disabled'); const runButton = document.getElementById('run'); - runButton.value = 'Reloading…'; - runButton.disabled = true; - runButton.classList.add('disabled'); + if (runButton) { + runButton.value = 'Reloading…'; + runButton.disabled = true; + runButton.classList.add('disabled'); + } const searchButton = document.getElementById('search'); searchButton.value = '…'; @@ -1334,9 +1339,11 @@ function enableButtons() { reloadButton.classList.remove('disabled'); const runButton = document.getElementById('run'); - runButton.value = 'Ok'; - runButton.disabled = false; - runButton.classList.remove('disabled'); + if (runButton) { + runButton.value = 'Ok'; + runButton.disabled = false; + runButton.classList.remove('disabled'); + } const searchButton = document.getElementById('search'); searchButton.value = '🔎'; @@ -1359,14 +1366,17 @@ async function reloadAll(do_search) { } await drawAll(); } catch (e) { - showError(e.toString()); + showError(e.message); } enableButtons(); } document.getElementById('params').onsubmit = function(event) { - let do_search = document.activeElement === document.getElementById('search-query'); - reloadAll(do_search); + if (document.activeElement === document.getElementById('search-query')) { + reloadAll(true); + } else { + reloadAll(queries.length == 0); + } event.preventDefault(); } @@ -1405,13 +1415,15 @@ function refreshCustomized(value) { document.getElementById('search-span').style.opacity = customized ? 0.5 : 1.0; } -function regenerate() { +function updateFromState() { document.getElementById('url').value = host; document.getElementById('user').value = user; document.getElementById('password').value = password; document.getElementById('search-query').value = search_query; refreshCustomized(); +} +function regenerate() { findParamsInQueries(); buildParams(); @@ -1430,7 +1442,7 @@ function regenerate() { window.onpopstate = function(event) { if (!event.state) { return; } ({host, user, queries, params, search_query, customized} = event.state); - + updateFromState(); regenerate(); drawAll(); }; @@ -1447,6 +1459,7 @@ if (window.location.hash) { async function start() { try { + updateFromState(); if (queries.length == 0) { await searchQueries(); } else { @@ -1460,7 +1473,7 @@ async function start() { drawAll(); } } catch (e) { - showError(e.toString()); + showError(e.message); } } diff --git a/programs/su/su.cpp b/programs/su/su.cpp index cebd05b3eb1..a8f61fb32b6 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -56,7 +56,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) group * result{}; if (0 != getgrnam_r(arg_gid.data(), &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid); if (!result) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group {} is not found in the system", arg_gid); @@ -68,7 +68,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group has id 0, but dropping privileges to gid 0 does not make sense"); if (0 != setgid(gid)) - throwFromErrno(fmt::format("Cannot do 'setgid' to user ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'setgid' to user ({})", arg_gid); } if (!arg_uid.empty()) @@ -81,7 +81,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) passwd * result{}; if (0 != getpwnam_r(arg_uid.data(), &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid); if (!result) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User {} is not found in the system", arg_uid); @@ -93,7 +93,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User has id 0, but dropping privileges to uid 0 does not make sense"); if (0 != setuid(uid)) - throwFromErrno(fmt::format("Cannot do 'setuid' to user ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'setuid' to user ({})", arg_uid); } } @@ -136,7 +136,7 @@ try execvp(new_argv.front(), new_argv.data()); - throwFromErrno("Cannot execvp", ErrorCodes::SYSTEM_ERROR); + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot execvp"); } catch (...) { diff --git a/rust/BLAKE3/CMakeLists.txt b/rust/BLAKE3/CMakeLists.txt deleted file mode 100644 index ceb0a647b66..00000000000 --- a/rust/BLAKE3/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -clickhouse_import_crate(MANIFEST_PATH Cargo.toml) -target_include_directories(_ch_rust_blake3 INTERFACE include) -add_library(ch_rust::blake3 ALIAS _ch_rust_blake3) diff --git a/rust/BLAKE3/Cargo.toml b/rust/BLAKE3/Cargo.toml deleted file mode 100644 index ed414fa54c1..00000000000 --- a/rust/BLAKE3/Cargo.toml +++ /dev/null @@ -1,20 +0,0 @@ -[package] -name = "_ch_rust_blake3" -version = "0.1.0" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -blake3 = "1.2.0" -libc = "0.2.132" - -[lib] -crate-type = ["staticlib"] - -[profile.release] -debug = true - -[profile.release-thinlto] -inherits = "release" -# BLAKE3 module requires "full" LTO (not "thin") to get additional 10% performance benefit -lto = true diff --git a/rust/BLAKE3/include/blake3.h b/rust/BLAKE3/include/blake3.h deleted file mode 100644 index 5dc7d5bd902..00000000000 --- a/rust/BLAKE3/include/blake3.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef BLAKE3_H -#define BLAKE3_H - -#include - - -extern "C" { - -char *blake3_apply_shim(const char *begin, uint32_t _size, uint8_t *out_char_data); - -void blake3_free_char_pointer(char *ptr_to_free); - -} // extern "C" - -#endif /* BLAKE3_H */ diff --git a/rust/BLAKE3/src/lib.rs b/rust/BLAKE3/src/lib.rs deleted file mode 100644 index 011145d2f71..00000000000 --- a/rust/BLAKE3/src/lib.rs +++ /dev/null @@ -1,30 +0,0 @@ -extern crate blake3; -extern crate libc; - -use std::ffi::{CStr, CString}; -use std::os::raw::c_char; - -#[no_mangle] -pub unsafe extern "C" fn blake3_apply_shim( - begin: *const c_char, - _size: u32, - out_char_data: *mut u8, -) -> *mut c_char { - if begin.is_null() { - let err_str = CString::new("input was a null pointer").unwrap(); - return err_str.into_raw(); - } - let mut hasher = blake3::Hasher::new(); - let input_bytes = CStr::from_ptr(begin); - let input_res = input_bytes.to_bytes(); - hasher.update(input_res); - let mut reader = hasher.finalize_xof(); - reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN)); - std::ptr::null_mut() -} - -// Freeing memory according to docs: https://doc.rust-lang.org/std/ffi/struct.CString.html#method.into_raw -#[no_mangle] -pub unsafe extern "C" fn blake3_free_char_pointer(ptr_to_free: *mut c_char) { - std::mem::drop(CString::from_raw(ptr_to_free)); -} diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index 6aa25e95679..66694ee16f8 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -14,6 +14,10 @@ macro(configure_rustc) set(RUST_CFLAGS "${RUST_CFLAGS} --sysroot ${CMAKE_SYSROOT}") endif() + if (USE_MUSL) + set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -D_LIBCPP_HAS_MUSL_LIBC=1") + endif () + if(CCACHE_EXECUTABLE MATCHES "/sccache$") message(STATUS "Using RUSTC_WRAPPER: ${CCACHE_EXECUTABLE}") set(RUSTCWRAPPER "rustc-wrapper = \"${CCACHE_EXECUTABLE}\"") @@ -95,6 +99,5 @@ function(add_rust_subdirectory src) VERBATIM) endfunction() -add_rust_subdirectory (BLAKE3) add_rust_subdirectory (skim) add_rust_subdirectory (prql) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 04569cd3b3a..86bbec5579f 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2,14 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "_ch_rust_blake3" -version = "0.1.0" -dependencies = [ - "blake3", - "libc", -] - [[package]] name = "_ch_rust_prql" version = "0.1.0" @@ -30,9 +22,9 @@ dependencies = [ [[package]] name = "addr2line" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" dependencies = [ "gimli", ] @@ -45,24 +37,31 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.7.6" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ - "getrandom", + "cfg-if", "once_cell", "version_check", + "zerocopy", ] [[package]] name = "aho-corasick" -version = "1.0.2" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -95,43 +94,43 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" [[package]] name = "anstyle-parse" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "anyhow" -version = "1.0.72" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" dependencies = [ "backtrace", ] @@ -146,12 +145,6 @@ dependencies = [ "yansi", ] -[[package]] -name = "arrayref" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" - [[package]] name = "arrayvec" version = "0.7.4" @@ -166,9 +159,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "backtrace" -version = "0.3.68" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" dependencies = [ "addr2line", "cc", @@ -193,44 +186,24 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.3.3" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" - -[[package]] -name = "blake3" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "cc" -version = "1.0.79" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] [[package]] name = "cfg-if" @@ -240,24 +213,23 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.26" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", - "time 0.1.45", "wasm-bindgen", - "winapi", + "windows-targets 0.48.5", ] [[package]] name = "chumsky" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" dependencies = [ "hashbrown", "stacker", @@ -279,17 +251,11 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" -[[package]] -name = "constant_time_eq" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" - [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "crossbeam" @@ -307,9 +273,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "14c3242926edf34aec4ac3a77108ad4854bffaa2e4ddc1824124ce59231302d5" dependencies = [ "cfg-if", "crossbeam-utils", @@ -317,9 +283,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -328,22 +294,21 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "2d2fe95351b870527a5d09bf563ed3c97c0cffb87cf1c78a591bf48bb218d9aa" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset 0.9.0", - "scopeguard", ] [[package]] name = "crossbeam-queue" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +checksum = "b9bcf5bdbfdd6030fb4a1c497b5d5fc5921aa2f60d359a17e249c0e6df3de153" dependencies = [ "cfg-if", "crossbeam-utils", @@ -351,28 +316,18 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +checksum = "c06d96137f14f244c37f989d9fff8f95e6c18b918e71f36638f8c49112e4c78f" dependencies = [ "cfg-if", ] -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "csv" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", "itoa", @@ -382,18 +337,18 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] [[package]] name = "cxx" -version = "1.0.102" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f68e12e817cb19eaab81aaec582b4052d07debd3c3c6b083b9d361db47c7dc9d" +checksum = "e9fc0c733f71e58dedf4f034cd2a266f80b94cc9ed512729e1798651b68c2cba" dependencies = [ "cc", "cxxbridge-flags", @@ -403,9 +358,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.102" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e789217e4ab7cf8cc9ce82253180a9fe331f35f5d339f0ccfe0270b39433f397" +checksum = "51bc81d2664db24cf1d35405f66e18a85cffd4d49ab930c71a5c6342a410f38c" dependencies = [ "cc", "codespan-reporting", @@ -413,24 +368,24 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] name = "cxxbridge-flags" -version = "1.0.102" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78a19f4c80fd9ab6c882286fa865e92e07688f4387370a209508014ead8751d0" +checksum = "8511afbe34ea242697784da5cb2c5d4a0afb224ca8b136bdf93bfe180cbe5884" [[package]] name = "cxxbridge-macro" -version = "1.0.102" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fcfa71f66c8563c4fa9dd2bb68368d50267856f831ac5d85367e0805f9606c" +checksum = "5c6888cd161769d65134846d4d4981d5a6654307cc46ec83fb917e530aea5f84" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] @@ -478,6 +433,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "deranged" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eb30d70a07a3b04884d2677f06bec33509dc67ca60d92949e5535352d3191dc" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_builder" version = "0.11.2" @@ -509,17 +473,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -556,28 +509,17 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] name = "errno" -version = "0.3.2" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", "libc", + "windows-sys 0.52.0", ] [[package]] @@ -595,40 +537,31 @@ dependencies = [ "thread_local", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] name = "gimli" -version = "0.27.3" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash", + "allocator-api2", ] [[package]] @@ -639,22 +572,22 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" [[package]] name = "iana-time-zone" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows", + "windows-core", ] [[package]] @@ -680,16 +613,7 @@ checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi", "rustix", - "windows-sys", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", + "windows-sys 0.48.0", ] [[package]] @@ -702,16 +626,25 @@ dependencies = [ ] [[package]] -name = "itoa" -version = "1.0.9" +name = "itertools" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" dependencies = [ "wasm-bindgen", ] @@ -724,9 +657,20 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.147" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" + +[[package]] +name = "libredox" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.1", + "libc", + "redox_syscall", +] [[package]] name = "link-cplusplus" @@ -739,21 +683,21 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.5" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" [[package]] name = "log" -version = "0.4.19" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "memoffset" @@ -825,37 +769,27 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" -version = "0.31.1" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "pin-utils" @@ -864,19 +798,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "proc-macro2" -version = "1.0.66" +name = "powerfmt" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" dependencies = [ "unicode-ident", ] [[package]] name = "prql-ast" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71194e75f14dbe7debdf2b5eca0812c978021a1bd23d6fe1da98b58e407e035a" +checksum = "d9d91522f9f16d055409b9ffec55693a96e3424fe5d8e7c8331adcf6d7ee363a" dependencies = [ "enum-as-inner", "semver", @@ -886,9 +826,9 @@ dependencies = [ [[package]] name = "prql-compiler" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ff28e838b1be4227cc567a75c11caa3be25c5015f0e5fd21279c06e944ba44f" +checksum = "f4d56865532fcf1abaa31fbb6da6fd9e90edc441c5c78bfe2870ee75187c7a3c" dependencies = [ "anstream", "anyhow", @@ -912,9 +852,9 @@ dependencies = [ [[package]] name = "prql-parser" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3182e2ef0465a960eb02519b18768e39123d3c3a0037a2d2934055a3ef901870" +checksum = "9360352e413390cfd26345f49279622b87581a3b748340d3f42d4d616c2a1ec1" dependencies = [ "chumsky", "itertools 0.11.0", @@ -933,18 +873,18 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.31" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" dependencies = [ "either", "rayon-core", @@ -952,41 +892,39 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] [[package]] name = "redox_users" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" dependencies = [ "getrandom", - "redox_syscall", + "libredox", "thiserror", ] [[package]] name = "regex" -version = "1.9.1" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", @@ -996,9 +934,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.3" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", @@ -1007,9 +945,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rustc-demangle" @@ -1019,15 +957,15 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustix" -version = "0.38.6" +version = "0.38.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee020b1716f0a80e2ace9b03441a749e402e86712f15f16fe8a8f75afac732f" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1038,15 +976,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] name = "scratch" @@ -1056,38 +988,38 @@ checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152" [[package]] name = "semver" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" dependencies = [ "serde", ] [[package]] name = "serde" -version = "1.0.174" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.174" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] name = "serde_json" -version = "1.0.103" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa", "ryu", @@ -1112,7 +1044,7 @@ dependencies = [ "nix 0.25.1", "rayon", "regex", - "time 0.3.23", + "time", "timer", "tuikit", "unicode-width", @@ -1121,20 +1053,20 @@ dependencies = [ [[package]] name = "sqlformat" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" +checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" dependencies = [ - "itertools 0.10.5", + "itertools 0.12.0", "nom", "unicode_categories", ] [[package]] name = "sqlparser" -version = "0.36.1" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" +checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075" dependencies = [ "log", "serde", @@ -1170,23 +1102,17 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.25.1" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.27", + "syn 2.0.41", ] -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - [[package]] name = "syn" version = "1.0.109" @@ -1200,9 +1126,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.27" +version = "2.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" +checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" dependencies = [ "proc-macro2", "quote", @@ -1222,31 +1148,31 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" dependencies = [ "winapi-util", ] [[package]] name = "thiserror" -version = "1.0.44" +version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" +checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.44" +version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" +checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", ] [[package]] @@ -1261,30 +1187,21 @@ dependencies = [ [[package]] name = "time" -version = "0.1.45" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ + "deranged", + "powerfmt", "serde", "time-core", ] [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "timer" @@ -1309,23 +1226,17 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "typenum" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" - [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unicode_categories" @@ -1366,12 +1277,6 @@ dependencies = [ "quote", ] -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1380,9 +1285,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1390,24 +1295,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1415,22 +1320,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.41", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" [[package]] name = "winapi" @@ -1450,9 +1355,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] @@ -1464,12 +1369,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows" -version = "0.48.0" +name = "windows-core" +version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -1478,68 +1383,154 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", ] [[package]] name = "windows-targets" -version = "0.48.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" [[package]] name = "windows_aarch64_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" [[package]] name = "windows_i686_gnu" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" [[package]] name = "windows_i686_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" [[package]] name = "windows_x86_64_gnu" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" [[package]] name = "windows_x86_64_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" [[package]] name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + +[[package]] +name = "zerocopy" +version = "0.7.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.41", +] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 2a2b582cea8..ac8b31a7290 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,7 +1,6 @@ # workspace is required to vendor crates for all packages. [workspace] members = [ - "BLAKE3", "skim", "prql", ] diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index f99ae2a8aea..463be6a3aea 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -82,7 +82,8 @@ enum class AccessType \ M(ALTER_VIEW_REFRESH, "ALTER LIVE VIEW REFRESH, REFRESH VIEW", VIEW, ALTER_VIEW) \ M(ALTER_VIEW_MODIFY_QUERY, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \ - M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY; + M(ALTER_VIEW_MODIFY_REFRESH, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \ + M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY, ALTER VIEW MODIFY REFRESH; implicitly enabled by the grant ALTER_TABLE */\ \ M(ALTER, "", GROUP, ALL) /* allows to execute ALTER {TABLE|LIVE VIEW} */\ @@ -177,12 +178,14 @@ enum class AccessType M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \ M(SYSTEM_PULLING_REPLICATION_LOG, "SYSTEM STOP PULLING REPLICATION LOG, SYSTEM START PULLING REPLICATION LOG", TABLE, SYSTEM) \ M(SYSTEM_CLEANUP, "SYSTEM STOP CLEANUP, SYSTEM START CLEANUP", TABLE, SYSTEM) \ + M(SYSTEM_VIEWS, "SYSTEM REFRESH VIEW, SYSTEM START VIEWS, SYSTEM STOP VIEWS, SYSTEM START VIEW, SYSTEM STOP VIEW, SYSTEM CANCEL VIEW, REFRESH VIEW, START VIEWS, STOP VIEWS, START VIEW, STOP VIEW, CANCEL VIEW", VIEW, SYSTEM) \ M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \ M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP REPLICATION QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \ M(SYSTEM_DROP_REPLICA, "DROP REPLICA", TABLE, SYSTEM) \ M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \ + M(SYSTEM_REPLICA_READINESS, "SYSTEM REPLICA READY, SYSTEM REPLICA UNREADY", GLOBAL, SYSTEM) \ M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \ M(SYSTEM_RESTORE_REPLICA, "RESTORE REPLICA", TABLE, SYSTEM) \ M(SYSTEM_WAIT_LOADING_PARTS, "WAIT LOADING PARTS", TABLE, SYSTEM) \ diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 90fddd0085d..567b131c00e 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -155,6 +155,7 @@ namespace "formats", "privileges", "data_type_families", + "database_engines", "table_engines", "table_functions", "aggregate_function_combinators", diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index b5a15513a89..a7594503992 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -51,7 +51,7 @@ TEST(AccessRights, Union) "CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, " "TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, " "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " - "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " + "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1"); } diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp index 15681eca817..a6010ff07c3 100644 --- a/src/AggregateFunctions/AggregateFunctionAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionAny.cpp @@ -110,7 +110,7 @@ public: } } } - else + else if (row_begin < row_end) { size_t pos = First ? row_begin : row_end - 1; add(place, columns, pos, arena); diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 5c101888140..b6ba562045d 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -51,10 +51,10 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat void AggregateFunctionFactory::registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls) { if (!aggregate_functions.contains(source_ignores_nulls)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found", source_ignores_nulls); if (!aggregate_functions.contains(target_respect_nulls)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found", target_respect_nulls); if (!respect_nulls.emplace(source_ignores_nulls, target_respect_nulls).second) throw Exception( diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index b95471df90a..6c6397e35d5 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -20,6 +20,7 @@ #include #include +#include #include diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp deleted file mode 100644 index debc9b6d565..00000000000 --- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -struct Settings; - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; -} - -namespace -{ - -template