diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ee14b3f0434..731119a9957 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,7 +1,7 @@ ### Changelog category (leave one): - New Feature - Improvement -- Bug Fix (user-visible misbehaviour in official stable or prestable release) +- Bug Fix (user-visible misbehavior in official stable or prestable release) - Performance Improvement - Backward Incompatible Change - Build/Testing/Packaging Improvement diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f4cbcbf1790..9a96ccc9906 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -362,50 +362,50 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderBinGCC: - needs: [DockerHubPush] - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) - BUILD_NAME=binary_gcc - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - docker kill "$(docker ps -q)" ||: - docker rm -f "$(docker ps -a -q)" ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + # BuilderBinGCC: + # needs: [DockerHubPush] + # runs-on: [self-hosted, builder] + # steps: + # - name: Set envs + # run: | + # cat >> "$GITHUB_ENV" << 'EOF' + # TEMP_PATH=${{runner.temp}}/build_check + # IMAGES_PATH=${{runner.temp}}/images_path + # REPO_COPY=${{runner.temp}}/build_check/ClickHouse + # CACHES_PATH=${{runner.temp}}/../ccaches + # CHECK_NAME=ClickHouse build check (actions) + # BUILD_NAME=binary_gcc + # EOF + # - name: Download changed images + # uses: actions/download-artifact@v2 + # with: + # name: changed_images + # path: ${{ env.IMAGES_PATH }} + # - name: Clear repository + # run: | + # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + # - name: Check out repository code + # uses: actions/checkout@v2 + # - name: Build + # run: | + # git -C "$GITHUB_WORKSPACE" submodule sync --recursive + # git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + # sudo rm -fr "$TEMP_PATH" + # mkdir -p "$TEMP_PATH" + # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + # - name: Upload build URLs to artifacts + # if: ${{ success() || failure() }} + # uses: actions/upload-artifact@v2 + # with: + # name: ${{ env.BUILD_URLS }} + # path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + # - name: Cleanup + # if: always() + # run: | + # docker kill "$(docker ps -q)" ||: + # docker rm -f "$(docker ps -a -q)" ||: + # sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -1030,7 +1030,7 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - - BuilderBinGCC + # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinTidy - BuilderDebSplitted @@ -2685,40 +2685,40 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" - UnitTestsReleaseGCC: - needs: [BuilderBinGCC] - runs-on: [self-hosted, fuzzer-unit-tester] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/unit_tests_asan - REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-gcc, actions) - REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse - EOF - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Unit test - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" - python3 unit_tests_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill "$(docker ps -q)" ||: - docker rm -f "$(docker ps -a -q)" ||: - sudo rm -fr "$TEMP_PATH" + # UnitTestsReleaseGCC: + # needs: [BuilderBinGCC] + # runs-on: [self-hosted, fuzzer-unit-tester] + # steps: + # - name: Set envs + # run: | + # cat >> "$GITHUB_ENV" << 'EOF' + # TEMP_PATH=${{runner.temp}}/unit_tests_asan + # REPORTS_PATH=${{runner.temp}}/reports_dir + # CHECK_NAME=Unit tests (release-gcc, actions) + # REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + # EOF + # - name: Download json reports + # uses: actions/download-artifact@v2 + # with: + # path: ${{ env.REPORTS_PATH }} + # - name: Clear repository + # run: | + # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + # - name: Check out repository code + # uses: actions/checkout@v2 + # - name: Unit test + # run: | + # sudo rm -fr "$TEMP_PATH" + # mkdir -p "$TEMP_PATH" + # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + # cd "$REPO_COPY/tests/ci" + # python3 unit_tests_check.py "$CHECK_NAME" + # - name: Cleanup + # if: always() + # run: | + # docker kill "$(docker ps -q)" ||: + # docker rm -f "$(docker ps -a -q)" ||: + # sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index d93b17cbe79..c1ae4798716 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -375,50 +375,50 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderBinGCC: - needs: [DockerHubPush, FastTest] - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) - BUILD_NAME=binary_gcc - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ runner.temp }}/images_path - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - docker kill "$(docker ps -q)" ||: - docker rm -f "$(docker ps -a -q)" ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + # BuilderBinGCC: + # needs: [DockerHubPush, FastTest] + # runs-on: [self-hosted, builder] + # steps: + # - name: Set envs + # run: | + # cat >> "$GITHUB_ENV" << 'EOF' + # TEMP_PATH=${{runner.temp}}/build_check + # IMAGES_PATH=${{runner.temp}}/images_path + # REPO_COPY=${{runner.temp}}/build_check/ClickHouse + # CACHES_PATH=${{runner.temp}}/../ccaches + # CHECK_NAME=ClickHouse build check (actions) + # BUILD_NAME=binary_gcc + # EOF + # - name: Download changed images + # uses: actions/download-artifact@v2 + # with: + # name: changed_images + # path: ${{ runner.temp }}/images_path + # - name: Clear repository + # run: | + # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + # - name: Check out repository code + # uses: actions/checkout@v2 + # - name: Build + # run: | + # git -C "$GITHUB_WORKSPACE" submodule sync --recursive + # git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + # sudo rm -fr "$TEMP_PATH" + # mkdir -p "$TEMP_PATH" + # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + # - name: Upload build URLs to artifacts + # if: ${{ success() || failure() }} + # uses: actions/upload-artifact@v2 + # with: + # name: ${{ env.BUILD_URLS }} + # path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + # - name: Cleanup + # if: always() + # run: | + # docker kill "$(docker ps -q)" ||: + # docker rm -f "$(docker ps -a -q)" ||: + # sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAarch64: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -1077,7 +1077,7 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - - BuilderBinGCC + # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinTidy - BuilderDebSplitted @@ -2886,40 +2886,40 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" - UnitTestsReleaseGCC: - needs: [BuilderBinGCC] - runs-on: [self-hosted, fuzzer-unit-tester] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/unit_tests_asan - REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-gcc, actions) - REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse - EOF - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Unit test - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" - python3 unit_tests_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill "$(docker ps -q)" ||: - docker rm -f "$(docker ps -a -q)" ||: - sudo rm -fr "$TEMP_PATH" + # UnitTestsReleaseGCC: + # needs: [BuilderBinGCC] + # runs-on: [self-hosted, fuzzer-unit-tester] + # steps: + # - name: Set envs + # run: | + # cat >> "$GITHUB_ENV" << 'EOF' + # TEMP_PATH=${{runner.temp}}/unit_tests_asan + # REPORTS_PATH=${{runner.temp}}/reports_dir + # CHECK_NAME=Unit tests (release-gcc, actions) + # REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + # EOF + # - name: Download json reports + # uses: actions/download-artifact@v2 + # with: + # path: ${{ env.REPORTS_PATH }} + # - name: Clear repository + # run: | + # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + # - name: Check out repository code + # uses: actions/checkout@v2 + # - name: Unit test + # run: | + # sudo rm -fr "$TEMP_PATH" + # mkdir -p "$TEMP_PATH" + # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + # cd "$REPO_COPY/tests/ci" + # python3 unit_tests_check.py "$CHECK_NAME" + # - name: Cleanup + # if: always() + # run: | + # docker kill "$(docker ps -q)" ||: + # docker rm -f "$(docker ps -a -q)" ||: + # sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index 7a7eddf444d..9c55c619039 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -3,9 +3,14 @@ name: TagsStableWorkflow # - Sends it to JFROG Artifactory # - Adds them to the release assets +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + on: # yamllint disable-line rule:truthy push: tags: + - 'v*-prestable' - 'v*-stable' - 'v*-lts' @@ -15,26 +20,36 @@ jobs: runs-on: [self-hosted, style-checker] steps: - name: Get tag name - run: echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" + run: | + echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - name: Check out repository code uses: actions/checkout@v2 with: ref: master + fetch-depth: 0 - name: Generate versions + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - git fetch --tags ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv + GID=$(id -d "${UID}") + docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \ + --volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \ + /ClickHouse/utils/changelog/changelog.py -vv --gh-user-or-token="$GITHUB_TOKEN" \ + --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" --jobs=5 "${GITHUB_TAG}" + git add "./docs/changelogs/${GITHUB_TAG}.md" + git diff HEAD - name: Create Pull Request uses: peter-evans/create-pull-request@v3 with: author: "robot-clickhouse " committer: "robot-clickhouse " - commit-message: Update version_date.tsv after ${{ env.GITHUB_TAG }} + commit-message: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} branch: auto/${{ env.GITHUB_TAG }} delete-branch: true - title: Update version_date.tsv after ${{ env.GITHUB_TAG }} + title: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} body: | - Update version_date.tsv after ${{ env.GITHUB_TAG }} + Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} - Changelog category (leave one): + ### Changelog category (leave one): - Not for changelog (changelog entry is not required) diff --git a/.gitmodules b/.gitmodules index 6c9e66f9cbc..5fd9e9721f6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -262,3 +262,6 @@ [submodule "contrib/minizip-ng"] path = contrib/minizip-ng url = https://github.com/zlib-ng/minizip-ng +[submodule "contrib/wyhash"] + path = contrib/wyhash + url = https://github.com/wangyi-fudan/wyhash.git diff --git a/benchmark/clickhouse/benchmark_cloud.sh b/benchmark/clickhouse/benchmark_cloud.sh new file mode 100755 index 00000000000..01376e4009e --- /dev/null +++ b/benchmark/clickhouse/benchmark_cloud.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +QUERIES_FILE="queries.sql" +TABLE=$1 +TRIES=3 + +PARAMS="--host ... --secure --password ..." + +if [ -x ./clickhouse ] +then + CLICKHOUSE_CLIENT="./clickhouse client" +elif command -v clickhouse-client >/dev/null 2>&1 +then + CLICKHOUSE_CLIENT="clickhouse-client" +else + echo "clickhouse-client is not found" + exit 1 +fi + +QUERY_ID_PREFIX="benchmark_$RANDOM" +QUERY_NUM=1 + +cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query + do + for i in $(seq 1 $TRIES) + do + QUERY_ID="${QUERY_ID_PREFIX}_${QUERY_NUM}_${i}" + ${CLICKHOUSE_CLIENT} ${PARAMS} --query_id "${QUERY_ID}" --format=Null --max_memory_usage=100G --query="$query" + echo -n '.' + done + QUERY_NUM=$((QUERY_NUM + 1)) + echo +done + +sleep 10 + +${CLICKHOUSE_CLIENT} ${PARAMS} --query " + WITH extractGroups(query_id, '(\d+)_(\d+)\$') AS num_run, num_run[1]::UInt8 AS num, num_run[2]::UInt8 AS run + SELECT groupArrayInsertAt(query_duration_ms / 1000, (run - 1)::UInt8)::String || ',' + FROM clusterAllReplicas(default, system.query_log) + WHERE event_date >= yesterday() AND type = 2 AND query_id LIKE '${QUERY_ID_PREFIX}%' + GROUP BY num ORDER BY num FORMAT TSV +" diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1f03c0fd341..94003f3b3ee 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -94,6 +94,7 @@ if (ENABLE_FUZZING) add_contrib (libprotobuf-mutator-cmake libprotobuf-mutator) endif() +add_contrib (wyhash-cmake wyhash) add_contrib (cityhash102) add_contrib (libfarmhash) add_contrib (icu-cmake icu) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 2e4059efc17..a4574493440 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -364,10 +364,8 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_snappy.cc" ${ARROW_SRCS}) add_definitions(-DARROW_WITH_ZLIB) SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS}) -if (ARROW_WITH_ZSTD) - add_definitions(-DARROW_WITH_ZSTD) - SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS}) -endif () +add_definitions(-DARROW_WITH_ZSTD) +SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS}) add_library(_arrow ${ARROW_SRCS}) @@ -383,7 +381,6 @@ target_link_libraries(_arrow PRIVATE ch_contrib::snappy ch_contrib::zlib ch_contrib::zstd - ch_contrib::zstd ) target_link_libraries(_arrow PUBLIC _orc) diff --git a/contrib/wyhash b/contrib/wyhash new file mode 160000 index 00000000000..991aa3dab62 --- /dev/null +++ b/contrib/wyhash @@ -0,0 +1 @@ +Subproject commit 991aa3dab624e50b066f7a02ccc9f6935cc740ec diff --git a/contrib/wyhash-cmake/CMakeLists.txt b/contrib/wyhash-cmake/CMakeLists.txt new file mode 100644 index 00000000000..679346d601a --- /dev/null +++ b/contrib/wyhash-cmake/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(wyhash INTERFACE) +target_include_directories(wyhash SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/wyhash") +add_library(ch_contrib::wyhash ALIAS wyhash) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index d9a5bb23a80..c547ae03a52 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -176,6 +176,7 @@ function clone_submodules contrib/NuRaft contrib/jemalloc contrib/replxx + contrib/wyhash ) git submodule sync diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 34414abc3f5..0cb25d12a9f 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -28,8 +28,11 @@ done set -e # cleanup for retry run if volume is not recreated -docker kill "$(docker ps -aq)" || true -docker rm "$(docker ps -aq)" || true +# shellcheck disable=SC2046 +{ + docker kill $(docker ps -aq) || true + docker rm $(docker ps -aq) || true +} echo "Start tests" export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 0ba4f0edc21..97b9225a2d2 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -101,7 +101,12 @@ EOL function stop() { - clickhouse stop + clickhouse stop --do-not-kill && return + # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. + kill -TERM "$(pidof gdb)" ||: + sleep 5 + gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" ||: + clickhouse stop --force } function start() @@ -198,10 +203,14 @@ clickhouse-client --query "SHOW TABLES FROM test" stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log +# NOTE Disable thread fuzzer before server start with data after stress test. +# In debug build it can take a lot of time. +unset "${!THREAD_@}" + start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \ && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" @@ -387,7 +396,7 @@ for table in query_log trace_log; do done # Write check result into check_status.tsv -clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv +clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # Core dumps (see gcore) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 3101ab84c40..427010dbe37 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ libxml2-utils \ moreutils \ pylint \ + python3-fuzzywuzzy \ python3-pip \ shellcheck \ yamllint \ diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 651883511e8..67870eb3234 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -18,3 +18,6 @@ echo "Check workflows" | ts echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +echo "Check help for changelog generator works" | ts +cd ../changelog || exit 1 +./changelog.py -h 2>/dev/null 1>&2 diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index dadda55c830..687c03697d7 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -12,7 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN " SKIPPED_SIGN = "[ SKIPPED " HUNG_SIGN = "Found hung queries in processlist" -NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"] +SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] RETRIES_SIGN = "Some tests were restarted" @@ -25,14 +25,14 @@ def process_test_log(log_path): success = 0 hung = False retries = False - task_timeout = True + success_finish = False test_results = [] with open(log_path, "r") as test_file: for line in test_file: original_line = line line = line.strip() - if any(s in line for s in NO_TASK_TIMEOUT_SIGNS): - task_timeout = False + if any(s in line for s in SUCCESS_FINISH_SIGNS): + success_finish = True if HUNG_SIGN in line: hung = True if RETRIES_SIGN in line: @@ -81,7 +81,7 @@ def process_test_log(log_path): failed, success, hung, - task_timeout, + success_finish, retries, test_results, ) @@ -108,7 +108,7 @@ def process_result(result_path): failed, success, hung, - task_timeout, + success_finish, retries, test_results, ) = process_test_log(result_path) @@ -123,10 +123,10 @@ def process_result(result_path): description = "Some queries hung, " state = "failure" test_results.append(("Some queries hung", "FAIL", "0", "")) - elif task_timeout: - description = "Timeout, " + elif not success_finish: + description = "Tests are not finished, " state = "failure" - test_results.append(("Timeout", "FAIL", "0", "")) + test_results.append(("Tests are not finished", "FAIL", "0", "")) elif retries: description = "Some tests restarted, " test_results.append(("Some tests restarted", "SKIPPED", "0", "")) diff --git a/docs/changelogs/v22.1.1.2542-prestable.md b/docs/changelogs/v22.1.1.2542-prestable.md new file mode 100644 index 00000000000..b552da5cfb8 --- /dev/null +++ b/docs/changelogs/v22.1.1.2542-prestable.md @@ -0,0 +1,217 @@ +### ClickHouse release v22.1.1.2542-prestable FIXME as compared to v21.12.1.9017-prestable + +#### Backward Incompatible Change +* Change ZooKeeper path for zero-copy marks for shared data. Fix for remove marks in ZooKeeper for renamed parts. [#32061](https://github.com/ClickHouse/ClickHouse/pull/32061) ([ianton-ru](https://github.com/ianton-ru)). +* - Account for scalar subqueries. With this change, rows read in scalar subqueries are now reported in the query_log. If the scalar subquery is cached (repeated or called for several rows) the rows read are only counted once. This change allows KILLing queries and reporting progress while they are executing scalar subqueries. [#32271](https://github.com/ClickHouse/ClickHouse/pull/32271) ([Raúl Marín](https://github.com/Algunenano)). +* Add `left`, `right`, `leftUTF8`, `rightUTF8` functions. Fix error in implementation of `substringUTF8` function with negative offset (offset from the end of string). The functions `left` and `right` were previously implemented in parser. Upgrade notes: distributed queries with `left` or `right` functions without aliases may throw exception if cluster contains different versions of clickhouse-server. If you are upgrading your cluster and encounter this error, you should finish upgrading your cluster to ensure all nodes have the same version. Also you can add aliases (`AS something`) to the columns in your queries to avoid this issue. [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)). +* add grouping sets function, like GROUP BY grouping sets (a, b, (a, b)). [#26869](https://github.com/ClickHouse/ClickHouse/pull/26869) ([taylor12805](https://github.com/taylor12805)). +* Added an ability to read from all replicas within a shard during distributed query. To enable this, set `allow_experimental_parallel_reading_from_replicas=true` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Start and stop servers when hosts and ports configuration changes. [#30549](https://github.com/ClickHouse/ClickHouse/pull/30549) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Create any kind of view with comment. ... [#31062](https://github.com/ClickHouse/ClickHouse/pull/31062) ([Vasily Nemkov](https://github.com/Enmk)). +* Implement hive table engine to access apache hive from clickhouse. Related RFC: [#29245](https://github.com/ClickHouse/ClickHouse/issues/29245). [#31104](https://github.com/ClickHouse/ClickHouse/pull/31104) ([李扬](https://github.com/taiyang-li)). +* * Automatic cluster discovery via Zoo/Keeper. [#31442](https://github.com/ClickHouse/ClickHouse/pull/31442) ([Vladimir C](https://github.com/vdimir)). +* Adding support for disks backed by Azure Blob Storage, in a similar way it has been done for disks backed by AWS S3. Current implementation allows for all the basic disk operations. [#31505](https://github.com/ClickHouse/ClickHouse/pull/31505) ([Jakub Kuklis](https://github.com/jkuklis)). +* * Add "TABLE OVERRIDE" feature for customizing MaterializedMySQL table schemas. [#32325](https://github.com/ClickHouse/ClickHouse/pull/32325) ([Stig Bakken](https://github.com/stigsb)). +* Implement data schema inference for input formats. Allow to skip structure (or write just `auto`) in table functions `file`, `url`, `s3`, `hdfs` and in parameters of `clickhouse-local` . Allow to skip structure in create query for table engines `File`, `HDFS`, `S3`, `URL`, `Merge`, `Buffer`, `Distributed` and `ReplicatedMergeTree` (if we add new replicas). [#32455](https://github.com/ClickHouse/ClickHouse/pull/32455) ([Kruglov Pavel](https://github.com/Avogar)). +* Support TABLE OVERRIDE clause for MaterializedPostgreSQL. RFC: [#31480](https://github.com/ClickHouse/ClickHouse/issues/31480). [#32749](https://github.com/ClickHouse/ClickHouse/pull/32749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `EXPLAIN TABLE OVERRIDE` query. [#32836](https://github.com/ClickHouse/ClickHouse/pull/32836) ([Stig Bakken](https://github.com/stigsb)). +* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). +* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorial values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added function `arrayLast`. Closes [#33390](https://github.com/ClickHouse/ClickHouse/issues/33390). [#33415](https://github.com/ClickHouse/ClickHouse/pull/33415) ([Maksim Kita](https://github.com/kitaisreal)). +* Add MONTHNAME function. [#33436](https://github.com/ClickHouse/ClickHouse/pull/33436) ([usurai](https://github.com/usurai)). +* Auto detect file extension. Close [#30918](https://github.com/ClickHouse/ClickHouse/issues/30918). [#33443](https://github.com/ClickHouse/ClickHouse/pull/33443) ([zhongyuankai](https://github.com/zhongyuankai)). +* Added function `arrayLastIndex`. [#33465](https://github.com/ClickHouse/ClickHouse/pull/33465) ([Maksim Kita](https://github.com/kitaisreal)). +* Add new h3 miscellaneous functions: `h3DegsToRads`, `h3RadsToDegs`, `h3HexAreaKm2`, `h3CellAreaM2`, `h3CellAreaRads2`. [#33479](https://github.com/ClickHouse/ClickHouse/pull/33479) ([Bharat Nallan](https://github.com/bharatnc)). +* Detect format by file extension in file/hdfs/s3/url table functions and HDFS/S3/URL table engines. [#33565](https://github.com/ClickHouse/ClickHouse/pull/33565) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Non significant change. In extremely rare cases when data part is lost on every replica, after merging of some data parts, the subsequent queries may skip less amount of partitions during partition pruning. This hardly affects anything. [#32220](https://github.com/ClickHouse/ClickHouse/pull/32220) ([Azat Khuzhin](https://github.com/azat)). +* Slight performance improvement of `reinterpret` function. [#32587](https://github.com/ClickHouse/ClickHouse/pull/32587) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduce allocated memory for dictionaries with string attributes. [#33466](https://github.com/ClickHouse/ClickHouse/pull/33466) ([Maksim Kita](https://github.com/kitaisreal)). +* Avoid exponential backtracking in parser. This closes [#20158](https://github.com/ClickHouse/ClickHouse/issues/20158). [#33481](https://github.com/ClickHouse/ClickHouse/pull/33481) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Now date time conversion functions that generates time before 1970-01-01 00:00:00 will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). +* Added settings `command_read_timeout`, `command_write_timeout` for `StorageExecutable`, `StorageExecutablePool`, `ExecutableDictionary`, `ExecutablePoolDictionary`, `ExecutableUserDefinedFunctions`. Setting `command_read_timeout` controls timeout for reading data from command stdout in milliseconds. Setting `command_write_timeout` timeout for writing data to command stdin in milliseconds. Added settings `command_termination_timeout` for `ExecutableUserDefinedFunction`, `ExecutableDictionary`, `StorageExecutable`. Added setting `execute_direct` for `ExecutableUserDefinedFunction`, by default true. Added setting `execute_direct` for `ExecutableDictionary`, `ExecutablePoolDictionary`, by default false. [#30957](https://github.com/ClickHouse/ClickHouse/pull/30957) ([Maksim Kita](https://github.com/kitaisreal)). +* Optimize single part projection materialization. This closes [#31669](https://github.com/ClickHouse/ClickHouse/issues/31669). [#31885](https://github.com/ClickHouse/ClickHouse/pull/31885) ([Amos Bird](https://github.com/amosbird)). +* Enable comparison between `Decimal` and `Float`. Closes [#22626](https://github.com/ClickHouse/ClickHouse/issues/22626). [#31966](https://github.com/ClickHouse/ClickHouse/pull/31966) ([flynn](https://github.com/ucasfl)). +* - Ignore parse failure of opentelemetry's `traceparent` header. [#32116](https://github.com/ClickHouse/ClickHouse/pull/32116) ([Frank Chen](https://github.com/FrankChen021)). +* Improve keeper writing performance by optimization the size calculation logic. [#32366](https://github.com/ClickHouse/ClickHouse/pull/32366) ([zhanglistar](https://github.com/zhanglistar)). +* Allows to connect to mongodb 5.0. Closes [#31483](https://github.com/ClickHouse/ClickHouse/issues/31483),. [#32416](https://github.com/ClickHouse/ClickHouse/pull/32416) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable some optimizations for window functions. Closes [#31535](https://github.com/ClickHouse/ClickHouse/issues/31535). Closes [#31620](https://github.com/ClickHouse/ClickHouse/issues/31620). [#32453](https://github.com/ClickHouse/ClickHouse/pull/32453) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow empty credentials for mongo engine. Closes [#26267](https://github.com/ClickHouse/ClickHouse/issues/26267). [#32460](https://github.com/ClickHouse/ClickHouse/pull/32460) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve handling nested structures with missing columns while reading protobuf. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/31988. [#32531](https://github.com/ClickHouse/ClickHouse/pull/32531) ([Vitaly Baranov](https://github.com/vitlibar)). +* Events clause support for window view watch query. [#32607](https://github.com/ClickHouse/ClickHouse/pull/32607) ([vxider](https://github.com/Vxider)). +* Add settings `max_concurrent_select_queries` and `max_concurrent_insert_queries` for control concurrent queries by query kind. Close [#3575](https://github.com/ClickHouse/ClickHouse/issues/3575). [#32609](https://github.com/ClickHouse/ClickHouse/pull/32609) ([SuperDJY](https://github.com/cmsxbc)). +* support Date32 for `genarateRandom` engine. [#32643](https://github.com/ClickHouse/ClickHouse/pull/32643) ([nauta](https://github.com/nautaa)). +* Support authSource option for storage MongoDB. Closes [#32594](https://github.com/ClickHouse/ClickHouse/issues/32594). [#32702](https://github.com/ClickHouse/ClickHouse/pull/32702) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to control connection timeouts for mysql (previously was supported only for dictionary source). Closes [#16669](https://github.com/ClickHouse/ClickHouse/issues/16669). Previously default connect_timeout was rather small, now it is configurable. [#32734](https://github.com/ClickHouse/ClickHouse/pull/32734) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Flush all In-Memory data parts when WAL is not enabled while shutdown server or detaching table. [#32742](https://github.com/ClickHouse/ClickHouse/pull/32742) ([nauta](https://github.com/nautaa)). +* Improve gRPC compression support for [#28671](https://github.com/ClickHouse/ClickHouse/issues/28671). [#32747](https://github.com/ClickHouse/ClickHouse/pull/32747) ([Vitaly Baranov](https://github.com/vitlibar)). +* Added support for specifying subquery as SQL user defined function. Example: `CREATE FUNCTION test AS () -> (SELECT 1)`. Closes [#30755](https://github.com/ClickHouse/ClickHouse/issues/30755). [#32758](https://github.com/ClickHouse/ClickHouse/pull/32758) ([Maksim Kita](https://github.com/kitaisreal)). +* Support hints for clickhouse-client and clickhouse-local. Closes [#32237](https://github.com/ClickHouse/ClickHouse/issues/32237),. [#32841](https://github.com/ClickHouse/ClickHouse/pull/32841) ([凌涛](https://github.com/lingtaolf)). +* - Do not prepend THREADS_COUNT with -j to avoid additional prepending in subprocesses. [#32844](https://github.com/ClickHouse/ClickHouse/pull/32844) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Added support for `BIT` data type in `MaterializedMySQL`. Closes [#15182](https://github.com/ClickHouse/ClickHouse/issues/15182), [#32233](https://github.com/ClickHouse/ClickHouse/issues/32233). [#32900](https://github.com/ClickHouse/ClickHouse/pull/32900) ([zzsmdfj](https://github.com/zzsmdfj)). +* More efficient handling of globs for url storage. Closes [#32866](https://github.com/ClickHouse/ClickHouse/issues/32866). [#32907](https://github.com/ClickHouse/ClickHouse/pull/32907) ([Kseniia Sumarokova](https://github.com/kssenii)). +* This only happens in unofficial builds. Fixed segfault when inserting data into compressed Decimal, String, FixedString and Array columns. This closes [#32939](https://github.com/ClickHouse/ClickHouse/issues/32939). [#32940](https://github.com/ClickHouse/ClickHouse/pull/32940) ([N. Kolotov](https://github.com/nkolotov)). +* Dictionaries added `Date32` date type support. Closes [#32913](https://github.com/ClickHouse/ClickHouse/issues/32913). [#32971](https://github.com/ClickHouse/ClickHouse/pull/32971) ([Maksim Kita](https://github.com/kitaisreal)). +* Short circuit evaluation function `throwIf` support. Closes [#32969](https://github.com/ClickHouse/ClickHouse/issues/32969). [#32973](https://github.com/ClickHouse/ClickHouse/pull/32973) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve Bool type serialization and deserialization. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). +* Send profile info in clickhouse-local. Closes [#33093](https://github.com/ClickHouse/ClickHouse/issues/33093). [#33097](https://github.com/ClickHouse/ClickHouse/pull/33097) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([李扬](https://github.com/taiyang-li)). +* Validate config keys for external dictionaries. [#33095](https://github.com/ClickHouse/ClickHouse/issues/33095)#issuecomment-1000577517. [#33130](https://github.com/ClickHouse/ClickHouse/pull/33130) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add retry for Postgres connect in case nothing has been fetched yet. Closes [#33199](https://github.com/ClickHouse/ClickHouse/issues/33199). [#33209](https://github.com/ClickHouse/ClickHouse/pull/33209) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly prevent nullable primary keys if necessary. This is for [#32780](https://github.com/ClickHouse/ClickHouse/issues/32780). [#33218](https://github.com/ClickHouse/ClickHouse/pull/33218) ([Amos Bird](https://github.com/amosbird)). +* If storage supports SETTINGS allow to pass them as key value or via config. Add this support for mysql. [#33231](https://github.com/ClickHouse/ClickHouse/pull/33231) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to cancel formats Arrow / Parquet / ORC which failed to be cancelled it case of big files and setting input_format_allow_seeks as false. Closes [#29678](https://github.com/ClickHouse/ClickHouse/issues/29678). [#33238](https://github.com/ClickHouse/ClickHouse/pull/33238) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* parseDateTimeBestEffort support Unix Timestamp with Milliseconds. [#33276](https://github.com/ClickHouse/ClickHouse/pull/33276) ([Ben](https://github.com/benbiti)). +* Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)). +* Pressing Ctrl+C twice will terminate `clickhouse-benchmark` immediately without waiting for in-flight queries. This closes [#32586](https://github.com/ClickHouse/ClickHouse/issues/32586). [#33303](https://github.com/ClickHouse/ClickHouse/pull/33303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* support `` in cluster configuration. Close [#33270](https://github.com/ClickHouse/ClickHouse/issues/33270). [#33330](https://github.com/ClickHouse/ClickHouse/pull/33330) ([SuperDJY](https://github.com/cmsxbc)). +* `LineAsString` can be used as output format. This closes [#30919](https://github.com/ClickHouse/ClickHouse/issues/30919). [#33331](https://github.com/ClickHouse/ClickHouse/pull/33331) ([Sergei Trifonov](https://github.com/serxa)). +* Allow negative intervals in function `intervalLengthSum`. Their length will be added as well. This closes [#33323](https://github.com/ClickHouse/ClickHouse/issues/33323). [#33335](https://github.com/ClickHouse/ClickHouse/pull/33335) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* clickhouse-local: track memory under --max_memory_usage_in_client option. [#33341](https://github.com/ClickHouse/ClickHouse/pull/33341) ([Azat Khuzhin](https://github.com/azat)). +* Make installation script working on FreeBSD. This closes [#33384](https://github.com/ClickHouse/ClickHouse/issues/33384). [#33418](https://github.com/ClickHouse/ClickHouse/pull/33418) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add some building options in system.build_options. [#33431](https://github.com/ClickHouse/ClickHouse/pull/33431) ([李扬](https://github.com/taiyang-li)). +* Abuse of `untuple` function was leading to exponential complexity of query analysis (found by fuzzer). This closes [#33297](https://github.com/ClickHouse/ClickHouse/issues/33297). [#33445](https://github.com/ClickHouse/ClickHouse/pull/33445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add config to enable ipv4 or ipv6. This close [#33381](https://github.com/ClickHouse/ClickHouse/issues/33381). [#33450](https://github.com/ClickHouse/ClickHouse/pull/33450) ([Wu Xueyang](https://github.com/wuxueyang96)). +* add function decodeURLFormComponent. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). +* Implement Materialized view `getVirtuals` function. Close [#11210](https://github.com/ClickHouse/ClickHouse/issues/11210). [#33482](https://github.com/ClickHouse/ClickHouse/pull/33482) ([zhongyuankai](https://github.com/zhongyuankai)). + +#### Bug Fix +* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). +* - Clickhouse Keeper handler should remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)). +* Fix null pointer dereference in low cardinality data when deserializing LowCardinality data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)). + +#### Build/Testing/Packaging Improvement +* - Add arm64 packages - Stream python logs in realtime with `PYTHONUNBUFFERED=1` - Fix building docker images in docker/packager/packager script. [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Terminate build when linker path not found. [#32437](https://github.com/ClickHouse/ClickHouse/pull/32437) ([JackyWoo](https://github.com/JackyWoo)). +* - Create a global ENV per job - Clean CCACHE after a build is over. [#32478](https://github.com/ClickHouse/ClickHouse/pull/32478) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* PENDING (Should mention submodule updates and versions). [#32484](https://github.com/ClickHouse/ClickHouse/pull/32484) ([Raúl Marín](https://github.com/Algunenano)). +* Remove readline support. [#32574](https://github.com/ClickHouse/ClickHouse/pull/32574) ([Azat Khuzhin](https://github.com/azat)). +* Fix build issue related to azure blob storage. [#32788](https://github.com/ClickHouse/ClickHouse/pull/32788) ([Amos Bird](https://github.com/amosbird)). +* - Unify init scripts for every worker runner type - Install pigz in AMI. [#32800](https://github.com/ClickHouse/ClickHouse/pull/32800) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use all available in container processors for PVS studio check and fast tests. Delete coverage image. [#32854](https://github.com/ClickHouse/ClickHouse/pull/32854) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Rename main to pull_request - Add BuilderDebAarch64 to all workflows - Use docker buildkit, save building cache to docker hub and reuse it - Build x86_64 and arm64 docker images separately, then merge them together to a multi-architecture manifest - Tune many docker images to being multi-architecture - Use the images from the current PR/commit in the following dependent builds - Upgrade mysql client in stateless-tests image - Add functional tests for aarch64 for PR actions (forced green for a while) - Add python typing to some scripts - Add docker buildkit to runners' init script - Add func-tester-aarch64 runners - Use `docker login --password-stdin` to not expose password on exception. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Enable hermetic build for shared builds. This is mainly for developers. [#32968](https://github.com/ClickHouse/ClickHouse/pull/32968) ([Amos Bird](https://github.com/amosbird)). +* Prepare ClickHouse to be built with musl-libc. It is not enabled by default. [#33134](https://github.com/ClickHouse/ClickHouse/pull/33134) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a simple tool to visualize flaky tests in web browser. [#33185](https://github.com/ClickHouse/ClickHouse/pull/33185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid strict checking when `ENABLE_AZURE_BLOB_STORAGE = 0`. https://github.com/ClickHouse/ClickHouse/pull/32948#discussion_r773168611 cc @nikitamikhaylov. [#33219](https://github.com/ClickHouse/ClickHouse/pull/33219) ([Amos Bird](https://github.com/amosbird)). +* Add more tests for the nullable primary key feature. Add more tests with different types and merge tree kinds, plus randomly generated data. [#33228](https://github.com/ClickHouse/ClickHouse/pull/33228) ([Amos Bird](https://github.com/amosbird)). +* Don't use particular encoding for diff-strings, it may contain multiple different encodings. [#33336](https://github.com/ClickHouse/ClickHouse/pull/33336) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid strict checking when ENABLE_AZURE_BLOB_STORAGE = 0. This is another try on behalf of https://github.com/ClickHouse/ClickHouse/pull/33219 , which was reverted likely due to CI issues. [#33346](https://github.com/ClickHouse/ClickHouse/pull/33346) ([Amos Bird](https://github.com/amosbird)). +* During migration from Yandex to github actions we've lost static links to the latest master ([doc](https://clickhouse.com/docs/en/getting-started/install/#from-single-binary)) It solves issue [#33480](https://github.com/ClickHouse/ClickHouse/issues/33480) partially. [#33559](https://github.com/ClickHouse/ClickHouse/pull/33559) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Restore a lost description checking. [#33591](https://github.com/ClickHouse/ClickHouse/pull/33591) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add `actionlint` for workflows and verify workflow files via `act --list` to check the correct workflow syntax. [#33612](https://github.com/ClickHouse/ClickHouse/pull/33612) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove editing /etc/hosts from Dockerfile. [#33635](https://github.com/ClickHouse/ClickHouse/pull/33635) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Properly separate thrift-cmake from arrow-cmake after https://github.com/ClickHouse/ClickHouse/pull/31104 . cc @taiyang-li. [#33661](https://github.com/ClickHouse/ClickHouse/pull/33661) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fixed CAST from String to IPv4 or IPv6 and back. Fixed error message in case of failed conversion. [#29224](https://github.com/ClickHouse/ClickHouse/pull/29224) ([Dmitry Novik](https://github.com/novikd)). +* Fix base64Encode adding trailing bytes on small strings. [#31797](https://github.com/ClickHouse/ClickHouse/pull/31797) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). +* Fix 'APPLY lambda' parsing which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). +* Some replication queue entries might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix unix timestamp Millisecond convert to DateTime64, fractional part calc reversed. [#32240](https://github.com/ClickHouse/ClickHouse/pull/32240) ([Ben](https://github.com/benbiti)). +* Fix broken select query when there are more than 2 row policies on same column, begin at second queries on the same session. [#31606](https://github.com/ClickHouse/ClickHouse/issues/31606). [#32291](https://github.com/ClickHouse/ClickHouse/pull/32291) ([SuperDJY](https://github.com/cmsxbc)). +* Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). +* Fix issue with ambiguous query formatting in distributed queries that led to errors when some table columns were named ALL or DISTINCT. This closes [#32391](https://github.com/ClickHouse/ClickHouse/issues/32391). [#32490](https://github.com/ClickHouse/ClickHouse/pull/32490) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The Proper handle of the case with apache arrow column duplication. [#32507](https://github.com/ClickHouse/ClickHouse/pull/32507) ([Dmitriy Mokhnatkin](https://github.com/DMokhnatkin)). +* Fix crash in `JoinCommon::removeColumnNullability`, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([Vladimir C](https://github.com/vdimir)). +* fix groupBitmapAnd function on distributed table. [#32529](https://github.com/ClickHouse/ClickHouse/pull/32529) ([minhthucdao](https://github.com/dmthuc)). +* Fix async inserts with formats CustomSeparated, Template, Regexp, MsgPack and JSONAsString. Previousely async inserts with these formats didn't read any data. [#32530](https://github.com/ClickHouse/ClickHouse/pull/32530) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix sparse_hashed dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). +* Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible exception at RabbitMQ storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix surprisingly bad code in function `file`. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* close [#32504](https://github.com/ClickHouse/ClickHouse/issues/32504). [#32649](https://github.com/ClickHouse/ClickHouse/pull/32649) ([Vladimir C](https://github.com/vdimir)). +* Fix LOGICAL_ERROR when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). +* Fix `optimize_read_in_order` optimization in case when table engine is `Distributed` or `Merge` and its underlying `MergeTree` tables have monotonous function in prefix of sorting key. [#32670](https://github.com/ClickHouse/ClickHouse/pull/32670) ([Anton Popov](https://github.com/CurtizJ)). +* Fix `ALTER TABLE ... MATERIALIZE TTL` query with `TTL ... DELETE WHERE ...` and `TTL ... GROUP BY ...` modes. [#32695](https://github.com/ClickHouse/ClickHouse/pull/32695) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Column is not under aggregate function` in case of MV with `GROUP BY (list of columns)` (which is pared as `GROUP BY tuple(...)`) over Kafka/RabbitMQ. Fixes [#32668](https://github.com/ClickHouse/ClickHouse/issues/32668) and [#32744](https://github.com/ClickHouse/ClickHouse/issues/32744). [#32751](https://github.com/ClickHouse/ClickHouse/pull/32751) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). +* Fix `--database` option for clickhouse-local. [#32797](https://github.com/ClickHouse/ClickHouse/pull/32797) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Avoid reusing the scalar subquery cache when processing MV blocks. This fixes a bug when the scalar query reference the source table but it means that all subscalar queries in the MV definition will be calculated for each block. [#32811](https://github.com/ClickHouse/ClickHouse/pull/32811) ([Raúl Marín](https://github.com/Algunenano)). +* `MergeTree` table engine might silently skip some mutations if there are too many running mutations or in case of high memory consumption, it's fixed. Fixes [#17882](https://github.com/ClickHouse/ClickHouse/issues/17882). [#32814](https://github.com/ClickHouse/ClickHouse/pull/32814) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix optimization with lazy seek for async reads from remote fs. Closes [#32803](https://github.com/ClickHouse/ClickHouse/issues/32803). [#32835](https://github.com/ClickHouse/ClickHouse/pull/32835) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed --echo option is not used by clickhouse-client in batch mode with single query. [#32843](https://github.com/ClickHouse/ClickHouse/pull/32843) ([N. Kolotov](https://github.com/nkolotov)). +* Fix MV query with multiple chunk result. Fixes [#31419](https://github.com/ClickHouse/ClickHouse/issues/31419). [#32862](https://github.com/ClickHouse/ClickHouse/pull/32862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Close [#32487](https://github.com/ClickHouse/ClickHouse/issues/32487). [#32914](https://github.com/ClickHouse/ClickHouse/pull/32914) ([Vladimir C](https://github.com/vdimir)). +* Fix ORC stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Fix a regression in `replaceRegexpAll` function. The function worked incorrectly when matched substring was empty. This closes [#32777](https://github.com/ClickHouse/ClickHouse/issues/32777). This closes [#30245](https://github.com/ClickHouse/ClickHouse/issues/30245). [#32945](https://github.com/ClickHouse/ClickHouse/pull/32945) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UB in case of unexpected EOF during filling a set from HTTP query (i.e. if the client interrupted in the middle, i.e. `timeout 0.15s curl -Ss -F 's=@t.csv;' 'http://127.0.0.1:8123/?s_structure=key+Int&query=SELECT+dummy+IN+s'` and with large enough `t.csv`). [#32955](https://github.com/ClickHouse/ClickHouse/pull/32955) ([Azat Khuzhin](https://github.com/azat)). +* Fix throwing exception like positional argument out of bounds for non-positional arguments. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173)#event-5789668239. [#32961](https://github.com/ClickHouse/ClickHouse/pull/32961) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong tuple output in CSV format in case of custom csv delimiter. [#32981](https://github.com/ClickHouse/ClickHouse/pull/32981) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix Context leak in case of cancel_http_readonly_queries_on_client_close (i.e. leaking of external tables that had been uploaded the the server and other resources). [#32982](https://github.com/ClickHouse/ClickHouse/pull/32982) ([Azat Khuzhin](https://github.com/azat)). +* Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix s3 table function reading empty file. Closes [#33008](https://github.com/ClickHouse/ClickHouse/issues/33008). [#33037](https://github.com/ClickHouse/ClickHouse/pull/33037) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix error `Invalid version for SerializationLowCardinality key column` in case of reading from `LowCardinality` column with `local_filesystem_read_prefetch` or `remote_filesystem_read_prefetch` enabled. [#33046](https://github.com/ClickHouse/ClickHouse/pull/33046) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow to write into S3 if path contains globs. [#33142](https://github.com/ClickHouse/ClickHouse/pull/33142) ([Kruglov Pavel](https://github.com/Avogar)). +* fix incorrect metric: StorageBufferBytes. [#33159](https://github.com/ClickHouse/ClickHouse/pull/33159) ([xuyatian](https://github.com/xuyatian)). +* Fix MaterializedPostreSQL detach/attach (removing / adding to replication) tables with non-default schema. Found in [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33179](https://github.com/ClickHouse/ClickHouse/pull/33179) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make sure unused replication slots are always removed. Found in [#26952](https://github.com/ClickHouse/ClickHouse/issues/26952),. [#33187](https://github.com/ClickHouse/ClickHouse/pull/33187) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ddl validation. Fix setting `materialized_postgresql_allow_automatic_update`. Closes [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33200](https://github.com/ClickHouse/ClickHouse/pull/33200) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix WHERE 1=0 for external databases query. Closes [#33152](https://github.com/ClickHouse/ClickHouse/issues/33152). [#33214](https://github.com/ClickHouse/ClickHouse/pull/33214) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add asynchronous inserts (with enabled setting `async_insert`) to query log. Previously such queries didn't appear in query log. [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239) ([Anton Popov](https://github.com/CurtizJ)). +* Fix ACLMap num, because acl_to_num will erase. [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246) ([小路](https://github.com/nicelulu)). +* Fix ACL with explicit digit hash in clickhouse-keeper: now the behavior consistent with zookeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([小路](https://github.com/nicelulu)). +* Fix when `COMMENT` for dictionaries does not appear in `system.tables`, `system.dictionaries`. Allow to modify comment for `Dictionary` engine. Closes [#33251](https://github.com/ClickHouse/ClickHouse/issues/33251). [#33261](https://github.com/ClickHouse/ClickHouse/pull/33261) ([Maksim Kita](https://github.com/kitaisreal)). +* The commands `SYSTEM SUSPEND` and `SYSTEM ... THREAD FUZZER` missed access control. It is fixed. Author: Kevin Michel. [#33333](https://github.com/ClickHouse/ClickHouse/pull/33333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not try to read pass EOF (to workaround a bug in a kernel), this bug can be reproduced on kernels (3.14..5.9), and requires `index_granularity_bytes=0` (i.e. turn off adaptive index granularity). [#33372](https://github.com/ClickHouse/ClickHouse/pull/33372) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible use-after-free for INSERT into MV with concurrent DROP ([#32572](https://github.com/ClickHouse/ClickHouse/issues/32572) significantly reduce the race window, this one should completely eliminate it). [#33386](https://github.com/ClickHouse/ClickHouse/pull/33386) ([Azat Khuzhin](https://github.com/azat)). +* Fix query cancellation in case of allow_experimental_parallel_reading_from_replicas. [#33456](https://github.com/ClickHouse/ClickHouse/pull/33456) ([Azat Khuzhin](https://github.com/azat)). +* Fix DROP MaterializedPostgreSQL database. [#33468](https://github.com/ClickHouse/ClickHouse/pull/33468) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix dictionary expressions for RangeHashedDictionary range min and range max attributes when created using DDL. Closes [#30809](https://github.com/ClickHouse/ClickHouse/issues/30809). [#33478](https://github.com/ClickHouse/ClickHouse/pull/33478) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix an exception `Block structure mismatch` which may happen during insertion into table with default nested `LowCardinality` column. Fixes [#33028](https://github.com/ClickHouse/ClickHouse/issues/33028). [#33504](https://github.com/ClickHouse/ClickHouse/pull/33504) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Close issue: [#33289](https://github.com/ClickHouse/ClickHouse/issues/33289) Fix bug when query view with setting offset and limit. [#33518](https://github.com/ClickHouse/ClickHouse/pull/33518) ([hexiaoting](https://github.com/hexiaoting)). +* Fix parsing incorrect queries with FROM INFILE statement. [#33521](https://github.com/ClickHouse/ClickHouse/pull/33521) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix segfault in arrowSchemaToCHHeader if schema contains Dictionary type. Closes [#33507](https://github.com/ClickHouse/ClickHouse/issues/33507). [#33529](https://github.com/ClickHouse/ClickHouse/pull/33529) ([Kruglov Pavel](https://github.com/Avogar)). +* session_id_counter poniter to next slot. [#33555](https://github.com/ClickHouse/ClickHouse/pull/33555) ([小路](https://github.com/nicelulu)). +* Fix segfault in Avro that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong database for JOIN w/o explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Update CHANGELOG.md'. [#32472](https://github.com/ClickHouse/ClickHouse/pull/32472) ([Rich Raposa](https://github.com/rfraposa)). +* NO CL ENTRY: 'Revert "Split long tests into multiple checks"'. [#32514](https://github.com/ClickHouse/ClickHouse/pull/32514) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Revert "Split long tests into multiple checks""'. [#32515](https://github.com/ClickHouse/ClickHouse/pull/32515) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'blog post how to enable predictive capabilities in Clickhouse'. [#32768](https://github.com/ClickHouse/ClickHouse/pull/32768) ([Tom Risse](https://github.com/flickerbox-tom)). +* NO CL ENTRY: 'Revert "Fix build issue related to azure blob storage"'. [#32845](https://github.com/ClickHouse/ClickHouse/pull/32845) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Dictionaries added Date32 type support"'. [#33053](https://github.com/ClickHouse/ClickHouse/pull/33053) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Updated Lawrence Berkeley National Lab stats'. [#33066](https://github.com/ClickHouse/ClickHouse/pull/33066) ([Michael Smitasin](https://github.com/michaelsmitasin)). +* NO CL ENTRY: 'fix AggregateFunctionGroupBitmapData function rb_contains rb_remove'. [#33127](https://github.com/ClickHouse/ClickHouse/pull/33127) ([DR](https://github.com/freedomDR)). +* NO CL ENTRY: 'Fix for example request with settings'. [#33143](https://github.com/ClickHouse/ClickHouse/pull/33143) ([Vitaly Artemyev](https://github.com/VitalyArt)). +* NO CL ENTRY: 'Revert "Grouping sets dev"'. [#33186](https://github.com/ClickHouse/ClickHouse/pull/33186) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Optimize MergeTreePartsMover'. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([zhongyuankai](https://github.com/zhongyuankai)). +* NO CL ENTRY: 'rm redundant judge in hashmap iter operation'. [#33285](https://github.com/ClickHouse/ClickHouse/pull/33285) ([zbtzbtzbt](https://github.com/zbtzbtzbt)). +* NO CL ENTRY: 'fix hang up with command 'drop table system.query_log sync''. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)). +* NO CL ENTRY: 'Improve query performance of system tables'. [#33312](https://github.com/ClickHouse/ClickHouse/pull/33312) ([zhongyuankai](https://github.com/zhongyuankai)). +* NO CL ENTRY: 'Revert "Better cmake script for azure blob"'. [#33319](https://github.com/ClickHouse/ClickHouse/pull/33319) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Added Superwall to adopters list'. [#33573](https://github.com/ClickHouse/ClickHouse/pull/33573) ([Justin Hilliard](https://github.com/jahilliard)). +* NO CL ENTRY: 'Revert "Ignore parse failure of opentelemetry header"'. [#33594](https://github.com/ClickHouse/ClickHouse/pull/33594) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release): + +* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL CATEGORY + +* Fix Regular Expression while key path search. [#33023](https://github.com/ClickHouse/ClickHouse/pull/33023) ([mreddy017](https://github.com/mreddy017)). +* - Allow to split GraphiteMergeTree rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). + +#### Bug Fix (v21.9.4.35-stable) + +* Fix [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). + +#### New Feature / New Tool + +* Tool for collecting diagnostics data. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)). + diff --git a/docs/changelogs/v22.1.2.2-stable.md b/docs/changelogs/v22.1.2.2-stable.md new file mode 100644 index 00000000000..450c640bc5e --- /dev/null +++ b/docs/changelogs/v22.1.2.2-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v22.1.2.2-stable FIXME as compared to v22.1.1.2542-prestable + diff --git a/docs/changelogs/v22.1.3.7-stable.md b/docs/changelogs/v22.1.3.7-stable.md new file mode 100644 index 00000000000..ffb0ec6048d --- /dev/null +++ b/docs/changelogs/v22.1.3.7-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v22.1.3.7-stable FIXME as compared to v22.1.2.2-stable + diff --git a/docs/changelogs/v22.1.4.30-stable.md b/docs/changelogs/v22.1.4.30-stable.md new file mode 100644 index 00000000000..1ea56131481 --- /dev/null +++ b/docs/changelogs/v22.1.4.30-stable.md @@ -0,0 +1,19 @@ +### ClickHouse release v22.1.4.30-stable FIXME as compared to v22.1.3.7-stable + +#### Build/Testing/Packaging Improvement +* Backport CI checks to 22.1 release branch. [#34897](https://github.com/ClickHouse/ClickHouse/pull/34897) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#34119](https://github.com/ClickHouse/ClickHouse/issues/34119): Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34124](https://github.com/ClickHouse/ClickHouse/issues/34124): Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34098](https://github.com/ClickHouse/ClickHouse/issues/34098): Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#34038](https://github.com/ClickHouse/ClickHouse/issues/34038): Fix bug which lead to inability for server to start when both replicated access storage and keeper are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)). +* Backported in [#34184](https://github.com/ClickHouse/ClickHouse/issues/34184): Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#34467](https://github.com/ClickHouse/ClickHouse/issues/34467): Fix inserts to distributed tables in case of change of native protocol. The last change was in the version version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34209](https://github.com/ClickHouse/ClickHouse/issues/34209): Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3. [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)). +* Backported in [#34266](https://github.com/ClickHouse/ClickHouse/issues/34266): Fix metric `Query`, which shows number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34298](https://github.com/ClickHouse/ClickHouse/issues/34298): Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#34391](https://github.com/ClickHouse/ClickHouse/issues/34391): Try to fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34430](https://github.com/ClickHouse/ClickHouse/issues/34430): Fix segfault in schema inference from url. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v22.2.1.2139-prestable.md b/docs/changelogs/v22.2.1.2139-prestable.md new file mode 100644 index 00000000000..187a2a26a66 --- /dev/null +++ b/docs/changelogs/v22.2.1.2139-prestable.md @@ -0,0 +1,208 @@ +### ClickHouse release v22.2.1.2139-prestable FIXME as compared to v22.1.1.2542-prestable + +#### New Feature +* (Not ready for production, put into experimental features) Add memory overcommit to `MemoryTracker`. Added `guaranteed` settings for memory limits which represent soft memory limits. In case when hard memory limit is reached, `MemoryTracker` tries to cancel the most overcommited query. New setting `memory_usage_overcommit_max_wait_microseconds` specifies how long queries may wait another query to stop. Closes [#28375](https://github.com/ClickHouse/ClickHouse/issues/28375). [#31182](https://github.com/ClickHouse/ClickHouse/pull/31182) ([Dmitry Novik](https://github.com/novikd)). +* The setting allows a user to provide own deduplication semantic in MergeTree/ReplicatedMergeTree If provided, it's used instead of data digest to generate block ID. So, for example, by providing a unique value for the setting in each INSERT statement, the user can avoid the same inserted data being deduplicated. This closes: [#7461](https://github.com/ClickHouse/ClickHouse/issues/7461). [#32304](https://github.com/ClickHouse/ClickHouse/pull/32304) ([Igor Nikonov](https://github.com/devcrafter)). +* Add support of DEFAULT keyword for INSERT statements. Closes [#6331](https://github.com/ClickHouse/ClickHouse/issues/6331). [#33141](https://github.com/ClickHouse/ClickHouse/pull/33141) ([Andrii Buriachevskyi](https://github.com/0over)). +* Add confidence intervals to ttests. [#33260](https://github.com/ClickHouse/ClickHouse/pull/33260) ([achimbab](https://github.com/achimbab)). +* Allow to create new files on insert for File/S3/HDFS engines. Allow to owerwrite file in HDFS. Throw an exception in attempt to overwrite a file in S3 by default. Throw an exception in attempt to append data to file in formats that have suffix. Closes [#31640](https://github.com/ClickHouse/ClickHouse/issues/31640) Closes [#31622](https://github.com/ClickHouse/ClickHouse/issues/31622) Closes [#23862](https://github.com/ClickHouse/ClickHouse/issues/23862) Closes [#15022](https://github.com/ClickHouse/ClickHouse/issues/15022) Closes [#16674](https://github.com/ClickHouse/ClickHouse/issues/16674). [#33302](https://github.com/ClickHouse/ClickHouse/pull/33302) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `h3ToCenterChild` function. [#33313](https://github.com/ClickHouse/ClickHouse/pull/33313) ([Bharat Nallan](https://github.com/bharatnc)). +* Merge functions for text classification. See [#23271](https://github.com/ClickHouse/ClickHouse/issues/23271). [#33314](https://github.com/ClickHouse/ClickHouse/pull/33314) ([Nikolay Degterinsky](https://github.com/evillique)). +* Implemented meanZTest. [#33354](https://github.com/ClickHouse/ClickHouse/pull/33354) ([achimbab](https://github.com/achimbab)). +* - Add function bitSlice. [#33360](https://github.com/ClickHouse/ClickHouse/pull/33360) ([RogerYK](https://github.com/RogerYK)). +* Add new h3 miscellaneous functions: `edgeLengthKm`,`exactEdgeLengthKm`,`exactEdgeLengthM`,`exactEdgeLengthRads`,`numHexagons`. [#33621](https://github.com/ClickHouse/ClickHouse/pull/33621) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `DEGREES` and `RADIANS` functions. [#33769](https://github.com/ClickHouse/ClickHouse/pull/33769) ([Bharat Nallan](https://github.com/bharatnc)). +* Parameter `--host` can accept multiple hosts. In case of unavailability of one of them, the client will try to connect to the next one. [#33824](https://github.com/ClickHouse/ClickHouse/pull/33824) ([Filippov Denis](https://github.com/DF5HSE)). +* Detect format in clickhouse-local by file name. [#33829](https://github.com/ClickHouse/ClickHouse/pull/33829) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new method `expire()` in PoolBase which is used to reallocate an invalid object in the pool. [#34076](https://github.com/ClickHouse/ClickHouse/pull/34076) ([lgbo](https://github.com/lgbo-ustc)). +* Add table function `format(format_name, data)`. [#34125](https://github.com/ClickHouse/ClickHouse/pull/34125) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to create default table engine. [#34187](https://github.com/ClickHouse/ClickHouse/pull/34187) ([Ilya Yatsishin](https://github.com/qoega)). +* `EPHEMERAL` column specifier is added to `CREATE TABLE` query. Closes [#9436](https://github.com/ClickHouse/ClickHouse/issues/9436). [#34424](https://github.com/ClickHouse/ClickHouse/pull/34424) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### Performance Improvement +* For inserts and merges into S3, write files in parallel whenever possible. [#33291](https://github.com/ClickHouse/ClickHouse/pull/33291) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve RangeHashedDictionary performance if for key there are a lot of intervals. Fixes [#23821](https://github.com/ClickHouse/ClickHouse/issues/23821). [#33516](https://github.com/ClickHouse/ClickHouse/pull/33516) ([Maksim Kita](https://github.com/kitaisreal)). +* Add x86 avx512 support for memcmpSmall functions to accelerate memory comparison. It works only if you compile ClickHouse by yourself. [#33706](https://github.com/ClickHouse/ClickHouse/pull/33706) ([hanqf-git](https://github.com/hanqf-git)). +* Reworks and reintroduces the scalar cache to MV execution. [#33958](https://github.com/ClickHouse/ClickHouse/pull/33958) ([Raúl Marín](https://github.com/Algunenano)). +* Make ORDER BY tuple almost as fast as ORDER BY columns. We have special optimizations for multiple column ORDER BY: https://github.com/ClickHouse/ClickHouse/pull/10831 . It's beneficial to also apply to tuple columns. [#34060](https://github.com/ClickHouse/ClickHouse/pull/34060) ([Amos Bird](https://github.com/amosbird)). +* Minor improvement to potential hot-path in `ExecuteScalarSubqueriesMatcher::visit` , where `std::set` was constructed on every function invocation. [#34128](https://github.com/ClickHouse/ClickHouse/pull/34128) ([Federico Rodriguez](https://github.com/fedrod)). +* Slightly improve performance of `Regexp` format. [#34202](https://github.com/ClickHouse/ClickHouse/pull/34202) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Optimize quantilesExact{Low,High} to use nth_element instead of sort. [#34287](https://github.com/ClickHouse/ClickHouse/pull/34287) ([Daniel Kutenin](https://github.com/danlark1)). +* Improve performance of `LineAsString` format. This closes [#34303](https://github.com/ClickHouse/ClickHouse/issues/34303). [#34306](https://github.com/ClickHouse/ClickHouse/pull/34306) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up loading of data parts. It was not parallelized before: the setting `part_loading_threads` did not have effect. See [#4699](https://github.com/ClickHouse/ClickHouse/issues/4699). [#34310](https://github.com/ClickHouse/ClickHouse/pull/34310) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `_file` and `_path` virtual columns (in file-like table engines) are made `LowCardinality` - it will make queries for multiple files faster. Closes [#34300](https://github.com/ClickHouse/ClickHouse/issues/34300). [#34317](https://github.com/ClickHouse/ClickHouse/pull/34317) ([flynn](https://github.com/ucasfl)). +* Improve performance of `mapPopulateSeries` function. Closes [#33944](https://github.com/ClickHouse/ClickHouse/issues/33944). [#34318](https://github.com/ClickHouse/ClickHouse/pull/34318) ([Maksim Kita](https://github.com/kitaisreal)). +* Use a vector to collect useless list iterators when doing a snapshot, and in latter clearOutdatedNodes, we can just traverse the vector, not the list, which is faster. [#34484](https://github.com/ClickHouse/ClickHouse/pull/34484) ([zhanglistar](https://github.com/zhanglistar)). +* Improve performance of insert into table functions URL, S3, File, HDFS. Closes [#34348](https://github.com/ClickHouse/ClickHouse/issues/34348). [#34510](https://github.com/ClickHouse/ClickHouse/pull/34510) ([Maksim Kita](https://github.com/kitaisreal)). +* According https://github.com/eBay/NuRaft/issues/209, there is no need to lock the mothod. [#34523](https://github.com/ClickHouse/ClickHouse/pull/34523) ([zhanglistar](https://github.com/zhanglistar)). + +#### Improvement +* Now ReplicatedMergeTree can recover data when some of its disks are broken. [#13544](https://github.com/ClickHouse/ClickHouse/pull/13544) ([Amos Bird](https://github.com/amosbird)). +* Merge [#15765](https://github.com/ClickHouse/ClickHouse/issues/15765) (Dynamic reload of server TLS certificates on config reload) cc @johnskopis. [#31257](https://github.com/ClickHouse/ClickHouse/pull/31257) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Added `UUID` data type support for functions `hex`, `bin`. [#32170](https://github.com/ClickHouse/ClickHouse/pull/32170) ([Frank Chen](https://github.com/FrankChen021)). +* Support `optimize_read_in_order` if prefix of sorting key is already sorted. E.g. if we have sorting key `ORDER BY (a, b)` in table and query with `WHERE a = const ORDER BY b` clauses, now it will be applied reading in order of sorting key instead of full sort. [#32748](https://github.com/ClickHouse/ClickHouse/pull/32748) ([Anton Popov](https://github.com/CurtizJ)). +* Add new keeper setting `min_session_timeout_ms`. Now keeper server will determine client session timeout according to `min_session_timeout_ms` and `session_timeout_ms` settings. [#33288](https://github.com/ClickHouse/ClickHouse/pull/33288) ([JackyWoo](https://github.com/JackyWoo)). +* Improve keeper performance and fix several memory leaks. [#33329](https://github.com/ClickHouse/ClickHouse/pull/33329) ([alesapin](https://github.com/alesapin)). +* Respect cgroup limits for CPU quota. [#33342](https://github.com/ClickHouse/ClickHouse/pull/33342) ([JaySon](https://github.com/JaySon-Huang)). +* Enable binary arithmetic(plus, minus, multiply, division, least, greates) between Decimal and Float. [#33355](https://github.com/ClickHouse/ClickHouse/pull/33355) ([flynn](https://github.com/ucasfl)). +* Replace `_shard_num` via constants (from [#7624](https://github.com/ClickHouse/ClickHouse/issues/7624)) with `shardNum()` function (from [#27020](https://github.com/ClickHouse/ClickHouse/issues/27020)), to avoid possible issues (like those that had been found in [#16947](https://github.com/ClickHouse/ClickHouse/issues/16947)). [#33392](https://github.com/ClickHouse/ClickHouse/pull/33392) ([Azat Khuzhin](https://github.com/azat)). +* Support `SET`, `YEAR`, `TIME` and `GEOMETRY` data types in `MaterializedMySQL`. Fixes [#18091](https://github.com/ClickHouse/ClickHouse/issues/18091), [#21536](https://github.com/ClickHouse/ClickHouse/issues/21536), [#26361](https://github.com/ClickHouse/ClickHouse/issues/26361). [#33429](https://github.com/ClickHouse/ClickHouse/pull/33429) ([zzsmdfj](https://github.com/zzsmdfj)). +* add function addressToLineWithInlines. Close [#26211](https://github.com/ClickHouse/ClickHouse/issues/26211). [#33467](https://github.com/ClickHouse/ClickHouse/pull/33467) ([SuperDJY](https://github.com/cmsxbc)). +* Improvement for `fromUnixTimestamp64` family functions.. They now accept any integer value that can be converted to `Int64`. This closes: [#14648](https://github.com/ClickHouse/ClickHouse/issues/14648). [#33505](https://github.com/ClickHouse/ClickHouse/pull/33505) ([Andrey Zvonov](https://github.com/zvonand)). +* Functions `dictGet`, `dictHas` implicitly cast key argument to dictionary key structure, if they are different. [#33672](https://github.com/ClickHouse/ClickHouse/pull/33672) ([Maksim Kita](https://github.com/kitaisreal)). +* - Parse and store OpenTelemetry trace-id in big-endian order. [#33723](https://github.com/ClickHouse/ClickHouse/pull/33723) ([Frank Chen](https://github.com/FrankChen021)). +* Enable stream to table join in WindowView. [#33729](https://github.com/ClickHouse/ClickHouse/pull/33729) ([vxider](https://github.com/Vxider)). +* Create parent directories in DiskS3::restoreFileOperations method. [#33730](https://github.com/ClickHouse/ClickHouse/pull/33730) ([ianton-ru](https://github.com/ianton-ru)). +* Add some improvements and fixes for Bool data type. Fixes [#33244](https://github.com/ClickHouse/ClickHouse/issues/33244). [#33737](https://github.com/ClickHouse/ClickHouse/pull/33737) ([Kruglov Pavel](https://github.com/Avogar)). +* Added support for cast from `Map(Key, Value)` to `Array(Tuple(Key, Value))`. [#33794](https://github.com/ClickHouse/ClickHouse/pull/33794) ([Maksim Kita](https://github.com/kitaisreal)). +* Support explain create function query ``` sql :) explain ast create function mycast AS (n) -> cast(n as String); EXPLAIN AST CREATE FUNCTION mycast AS n -> CAST(n, 'String'). [#33819](https://github.com/ClickHouse/ClickHouse/pull/33819) ([李扬](https://github.com/taiyang-li)). +* Try every resolved ip address while getting S3 proxy. [#33862](https://github.com/ClickHouse/ClickHouse/pull/33862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `FlatDictionary` improve performance of dictionary data load. [#33871](https://github.com/ClickHouse/ClickHouse/pull/33871) ([Maksim Kita](https://github.com/kitaisreal)). +* fix disk using the same path, close [#29072](https://github.com/ClickHouse/ClickHouse/issues/29072). [#33905](https://github.com/ClickHouse/ClickHouse/pull/33905) ([zhongyuankai](https://github.com/zhongyuankai)). +* Dictionaries added support for DateTime64. [#33914](https://github.com/ClickHouse/ClickHouse/pull/33914) ([Maksim Kita](https://github.com/kitaisreal)). +* `FlatDictionary`, `HashedDictionary`, `HashedArrayDictionary` added support for creating with empty attributes, with support of read all keys, and `dictHas`. Fixes [#33820](https://github.com/ClickHouse/ClickHouse/issues/33820). [#33918](https://github.com/ClickHouse/ClickHouse/pull/33918) ([Maksim Kita](https://github.com/kitaisreal)). +* `RangeHashedDictionary` improvements. Improve performance of load time if there are multiple attributes. Allow to create without attributes. Added option to specify strategy when intervals `start` and `end` have `Nullable` type `convert_null_range_bound_to_open` by default is `true`. Closes [#29791](https://github.com/ClickHouse/ClickHouse/issues/29791). Allow to specify `Float`, `Decimal`, `DateTime64`, `Int128`, `Int256`, `UInt128`, `UInt256` as range types. `RangeHashedDictionary` added support for range values that extend `Int64` type. Closes [#28322](https://github.com/ClickHouse/ClickHouse/issues/28322). Added option `range_lookup_strategy` to specify range lookup type `min`, `max` by default is `min` . Closes [#21647](https://github.com/ClickHouse/ClickHouse/issues/21647). Fixed allocated bytes calculations. Fixed type name in `system.dictionaries` in case of `ComplexKeyHashedDictionary`. [#33927](https://github.com/ClickHouse/ClickHouse/pull/33927) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix getauxval() in glibc-compatibility, this should fix vsyscalls after `setenv` (i.e. timezone is set in config), and LSan (and also fix some leaks that had been found by LSan). [#33957](https://github.com/ClickHouse/ClickHouse/pull/33957) ([Azat Khuzhin](https://github.com/azat)). +* Detect format and schema from stdin in clickhouse-local. [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed UTF-8 string case-insensitive search when lowercase and uppercase characters are represented by different number of bytes. Example is `ẞ` and `ß`. This closes [#7334](https://github.com/ClickHouse/ClickHouse/issues/7334). [#33992](https://github.com/ClickHouse/ClickHouse/pull/33992) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix memory accounting for queries that uses < max_untracker_memory. [#34001](https://github.com/ClickHouse/ClickHouse/pull/34001) ([Azat Khuzhin](https://github.com/azat)). +* Supports all types of SYSTEM query ON CLUSTER clause. [#34005](https://github.com/ClickHouse/ClickHouse/pull/34005) ([小路](https://github.com/nicelulu)). +* Add schema inference for values() table function. Closes [#33811](https://github.com/ClickHouse/ClickHouse/issues/33811). [#34017](https://github.com/ClickHouse/ClickHouse/pull/34017) ([Kruglov Pavel](https://github.com/Avogar)). +* Tracing context is now propagated from GRPC client metadata. [#34064](https://github.com/ClickHouse/ClickHouse/pull/34064) ([andremarianiello](https://github.com/andremarianiello)). +* Add UUID suport in MsgPack input/output format. [#34065](https://github.com/ClickHouse/ClickHouse/pull/34065) ([Kruglov Pavel](https://github.com/Avogar)). +* Improving the experience of multiple line editing for clickhouse-client. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/31123. [#34114](https://github.com/ClickHouse/ClickHouse/pull/34114) ([Amos Bird](https://github.com/amosbird)). +* Maxsplit argument for splitByChar. close [#34081](https://github.com/ClickHouse/ClickHouse/issues/34081). [#34140](https://github.com/ClickHouse/ClickHouse/pull/34140) ([李扬](https://github.com/taiyang-li)). +* Allow to parse dictionary `PRIMARY KEY` as `PRIMARY KEY (id, value)`, previously supported only `PRIMARY KEY id, value`. Closes [#34135](https://github.com/ClickHouse/ClickHouse/issues/34135). [#34141](https://github.com/ClickHouse/ClickHouse/pull/34141) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow carriage return in the middle of the line while parsing by `Regexp` format. This closes [#34200](https://github.com/ClickHouse/ClickHouse/issues/34200). [#34205](https://github.com/ClickHouse/ClickHouse/pull/34205) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Recognize `YYYYMMDD-hhmmss` format in `parseDateTimeBestEffort` function. This closes [#34206](https://github.com/ClickHouse/ClickHouse/issues/34206). [#34208](https://github.com/ClickHouse/ClickHouse/pull/34208) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to compose PostgreSQL-style cast operator `::` with `ArrayElement` and `TupleElement`. [#34229](https://github.com/ClickHouse/ClickHouse/pull/34229) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added `#!` and `# ` as a recognised start of a single line comment. Reference to task [#34138](https://github.com/ClickHouse/ClickHouse/issues/34138). [#34230](https://github.com/ClickHouse/ClickHouse/pull/34230) ([Aaron Katz](https://github.com/aaronstephenkatz)). +* Change severity of the "Cancelled merging parts" message in logs, because it's not an error. This closes [#34148](https://github.com/ClickHouse/ClickHouse/issues/34148). [#34232](https://github.com/ClickHouse/ClickHouse/pull/34232) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Apply data skipping indexes for queries with FINAL may produce incorrect result. Disable data skipping indexes by default for queries with FINAL (introduce new `use_skip_indexes_if_final` setting and disable it by default). [#34243](https://github.com/ClickHouse/ClickHouse/pull/34243) ([Azat Khuzhin](https://github.com/azat)). +* Support asynchronous inserts in `clickhouse-client` for queries with inlined data. [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) ([Anton Popov](https://github.com/CurtizJ)). +* Cancel merges before acquiring table lock for `TRUNCATE` query to avoid `DEADLOCK_AVOIDED` error in some cases. Fixes [#34302](https://github.com/ClickHouse/ClickHouse/issues/34302). [#34304](https://github.com/ClickHouse/ClickHouse/pull/34304) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Some servers expect a User-Agent header in their HTTP requests. A `User-Agent` header entry has been added to HTTP requests of the form: User-Agent: ClickHouse/VERSION_STRING. [#34330](https://github.com/ClickHouse/ClickHouse/pull/34330) ([Saad Ur Rahman](https://github.com/surahman)). +* REGEXP_MATCHES and REGEXP_REPLACE function aliases for compatibility with PostgreSQL. Close [#30885](https://github.com/ClickHouse/ClickHouse/issues/30885). [#34334](https://github.com/ClickHouse/ClickHouse/pull/34334) ([李扬](https://github.com/taiyang-li)). +* Better handle pre-inputs before client start. This is for [#34308](https://github.com/ClickHouse/ClickHouse/issues/34308) . [#34336](https://github.com/ClickHouse/ClickHouse/pull/34336) ([Amos Bird](https://github.com/amosbird)). +* Add options for clickhouse-format. Which close [#30528](https://github.com/ClickHouse/ClickHouse/issues/30528) - max_query_size - max_parser_depth. [#34349](https://github.com/ClickHouse/ClickHouse/pull/34349) ([李扬](https://github.com/taiyang-li)). +* Default input and output formats that can be overriden by --input-format and --output-format. Close [#30631](https://github.com/ClickHouse/ClickHouse/issues/30631). [#34352](https://github.com/ClickHouse/ClickHouse/pull/34352) ([李扬](https://github.com/taiyang-li)). +* Allow to skip not found urls for globs when using URL storage / table function. Also closes [#34359](https://github.com/ClickHouse/ClickHouse/issues/34359). [#34392](https://github.com/ClickHouse/ClickHouse/pull/34392) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add two new settings: `s3_upload_part_size_multiply_factor` and `s3_upload_part_size_multiply_parts_count_threshold`. Now each time `s3_upload_part_size_multiply_parts_count_threshold` uploaded to S3 from a single query `s3_min_upload_part_size` multiplied by `s3_upload_part_size_multiply_factor`. Fixes [#34244](https://github.com/ClickHouse/ClickHouse/issues/34244). [#34422](https://github.com/ClickHouse/ClickHouse/pull/34422) ([alesapin](https://github.com/alesapin)). +* Allow `allow_experimental_projection_optimization` by default. [#34456](https://github.com/ClickHouse/ClickHouse/pull/34456) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Privileges CREATE/ALTER/DROP ROW POLICY now can be granted on a table or on `database.*` as well as globally `*.*`. [#34489](https://github.com/ClickHouse/ClickHouse/pull/34489) ([Vitaly Baranov](https://github.com/vitlibar)). +* Refactor client fault tolerant connection (https://github.com/ClickHouse/ClickHouse/pull/33824#issuecomment-1033690860). The new way to use it: ```bash clickhouse-client ... --host host1 --host host2 --port port2 --host host3 --port port --host host4 ```. [#34490](https://github.com/ClickHouse/ClickHouse/pull/34490) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve schema inference in clickhouse-local. Allow to write just `clickhouse-local -q "select * from table" < data.format`. [#34495](https://github.com/ClickHouse/ClickHouse/pull/34495) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `.jsonl` extension for JSONEachRow format. [#34496](https://github.com/ClickHouse/ClickHouse/pull/34496) ([Kruglov Pavel](https://github.com/Avogar)). +* Send ProfileEvents statistics in case of INSERT SELECT query. [#34498](https://github.com/ClickHouse/ClickHouse/pull/34498) ([Dmitry Novik](https://github.com/novikd)). +* Added sending of the output format back to client like it's done in HTTP protocol as suggested in [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). Closes [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). [#34499](https://github.com/ClickHouse/ClickHouse/pull/34499) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow to write `s3(url, access_key_id, secret_access_key)`. [#34503](https://github.com/ClickHouse/ClickHouse/pull/34503) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `IF EXISTS` clause for `TTL expr TO [DISK|VOLUME] [IF EXISTS] 'xxx'` feature. Parts will be moved to disk or volume only if it exists on replica, so `MOVE TTL` rules will be able to behave differently on replicas according to the existing storage policies. Resolves [#34455](https://github.com/ClickHouse/ClickHouse/issues/34455). [#34504](https://github.com/ClickHouse/ClickHouse/pull/34504) ([Anton Popov](https://github.com/CurtizJ)). +* Little improvement no need to clone log entry. [#34587](https://github.com/ClickHouse/ClickHouse/pull/34587) ([zhanglistar](https://github.com/zhanglistar)). +* Slightly improve performance in case of filtering by sparse columns (which can be enabled by setting `ratio_of_defaults_for_sparse_serialization` in `MergeTree` tables). [#34601](https://github.com/ClickHouse/ClickHouse/pull/34601) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)). +* This PR fixes [#18206](https://github.com/ClickHouse/ClickHouse/issues/18206). [#33977](https://github.com/ClickHouse/ClickHouse/pull/33977) ([Vitaly Baranov](https://github.com/vitlibar)). +* This PR fixes [#19429](https://github.com/ClickHouse/ClickHouse/issues/19429). [#34225](https://github.com/ClickHouse/ClickHouse/pull/34225) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix inserting to temporary tables via gRPC. This PR fixes [#34347](https://github.com/ClickHouse/ClickHouse/issues/34347), issue `#2`. [#34364](https://github.com/ClickHouse/ClickHouse/pull/34364) ([Vitaly Baranov](https://github.com/vitlibar)). +* add HashMethodSingleLowCardinalityColumn::findKey, avoid crash. [#34506](https://github.com/ClickHouse/ClickHouse/pull/34506) ([DR](https://github.com/freedomDR)). + +#### Build/Testing/Packaging Improvement +* Add action for published releases. [#32218](https://github.com/ClickHouse/ClickHouse/pull/32218) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove unbundled support. [#33690](https://github.com/ClickHouse/ClickHouse/pull/33690) ([Azat Khuzhin](https://github.com/azat)). +* Some improvement over current build system. [#33695](https://github.com/ClickHouse/ClickHouse/pull/33695) ([Amos Bird](https://github.com/amosbird)). +* Removed "Yandex ClickHouse" terms from descriptions. Change to default mirrors for packages. [#33745](https://github.com/ClickHouse/ClickHouse/pull/33745) ([Ilya Yatsishin](https://github.com/qoega)). +* - Fix unconditional `--build-arg FROM_TAG=` docker argument - Add some tests for docker tests/ci. [#33751](https://github.com/ClickHouse/ClickHouse/pull/33751) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable dh_update_autotools_config that updates config.guess/config.sub in sources directory on build. It will be more deterministic in terms of different build hosts used as config.guess is provided with sources. [#33752](https://github.com/ClickHouse/ClickHouse/pull/33752) ([Ilya Yatsishin](https://github.com/qoega)). +* clickhouse-test.deb is not used in stateless CI checks. [#33948](https://github.com/ClickHouse/ClickHouse/pull/33948) ([Ilya Yatsishin](https://github.com/qoega)). +* - Add on-demand style-checker-aarch64 hosts - Run dockerpush CI jobs there. [#33954](https://github.com/ClickHouse/ClickHouse/pull/33954) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add back the missing `-Werror` flag globally. This fixes https://github.com/ClickHouse/ClickHouse/pull/33940#issuecomment-1020466537. [#33970](https://github.com/ClickHouse/ClickHouse/pull/33970) ([Amos Bird](https://github.com/amosbird)). +* Separate base parts out of SystemLog. Common code can use system log without fully linking to Interpreters. This helps with errors like https://s3.amazonaws.com/clickhouse-builds/33970/49b229f9c781854861254350d3407f209fb99dfd/binary_splitted/build_log.log. [#33978](https://github.com/ClickHouse/ClickHouse/pull/33978) ([Amos Bird](https://github.com/amosbird)). +* Fix broken dependencies tree building, improve tests. [#33983](https://github.com/ClickHouse/ClickHouse/pull/33983) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Add get_with_retries to a download helper - Use it un PRInfo class - Replace `labels_from_api` by `pr_event_from_api` - Use it in description check to have always actual body and labels. [#34012](https://github.com/ClickHouse/ClickHouse/pull/34012) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Fix style check for tags_stable.yml - Run StyleCheck always in PR - Mark `Run Check` as failed only for some cases. [#34283](https://github.com/ClickHouse/ClickHouse/pull/34283) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add verbosity to a style check. [#34289](https://github.com/ClickHouse/ClickHouse/pull/34289) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix previously wrong OK_SKIP_LABELS in run_check.py. [#34340](https://github.com/ClickHouse/ClickHouse/pull/34340) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Migrate docker images from Moscow timezone to UTC - Update ubuntu version for performance comparison from 18.04 to 20.04. [#34373](https://github.com/ClickHouse/ClickHouse/pull/34373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Ensure that tests don't depend on the result of non-stable sorting of equal elements. Added equal items ranges randomization in debug after sort to prevent issues when we rely on equal items sort order. [#34393](https://github.com/ClickHouse/ClickHouse/pull/34393) ([Maksim Kita](https://github.com/kitaisreal)). +* - Rebuild docker images on a daily base - Add `--all` flag to rebuild all images - Add `only_amd64` parameter for some images - Revert all workarounds for `only_amd64` images - Fix all broken images. [#34492](https://github.com/ClickHouse/ClickHouse/pull/34492) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix broken Hadoop tests after updated paths in an image. [#34556](https://github.com/ClickHouse/ClickHouse/pull/34556) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* I test compile ClickHouse without HDFS but failed, because the code in DiskHDFS doesn`t judge whether use HDFS. [#34573](https://github.com/ClickHouse/ClickHouse/pull/34573) ([zxealous](https://github.com/zxealous)). +* Set timeout 40 minutes for fast tests. [#34614](https://github.com/ClickHouse/ClickHouse/pull/34614) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reverts ClickHouse/ClickHouse[#34614](https://github.com/ClickHouse/ClickHouse/issues/34614). [#34622](https://github.com/ClickHouse/ClickHouse/pull/34622) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Rework version_helper, make it executable - Reimplement StorageSystemContributors.sh in version_helper - Create a release script. [#34641](https://github.com/ClickHouse/ClickHouse/pull/34641) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Fix path in workflows/release.yml - To be backported to branch 22.1. [#34646](https://github.com/ClickHouse/ClickHouse/pull/34646) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix lz4 compression for output. Closes [#31421](https://github.com/ClickHouse/ClickHouse/issues/31421). [#31862](https://github.com/ClickHouse/ClickHouse/pull/31862) ([Kruglov Pavel](https://github.com/Avogar)). +* Create a function escapeForLDAPFilter and use it to escape characters '(' and ')' in a final_user_dn variable. [#33401](https://github.com/ClickHouse/ClickHouse/pull/33401) ([IlyaTsoi](https://github.com/IlyaTsoi)). +* TODO. [#33492](https://github.com/ClickHouse/ClickHouse/pull/33492) ([huzhichengdd](https://github.com/huzhichengdd)). +* Fix error `Bad cast from type ... to DB::DataTypeArray` which may happen when table has `Nested` column with dots in name, and default value is generated for it (e.g. during insert, when column is not listed). Continuation of [#28762](https://github.com/ClickHouse/ClickHouse/issues/28762). [#33588](https://github.com/ClickHouse/ClickHouse/pull/33588) ([Alexey Pavlenko](https://github.com/alexeypavlenko)). +* Fix `Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform` (in case of `optimize_aggregation_in_order=1`). [#33637](https://github.com/ClickHouse/ClickHouse/pull/33637) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug in zero copy replication which lead to data duplication in case of TTL move. Fixes [#33643](https://github.com/ClickHouse/ClickHouse/issues/33643). [#33642](https://github.com/ClickHouse/ClickHouse/pull/33642) ([alesapin](https://github.com/alesapin)). +* Allow some queries with sorting, LIMIT BY, ARRAY JOIN and lambda functions. This closes [#7462](https://github.com/ClickHouse/ClickHouse/issues/7462). [#33675](https://github.com/ClickHouse/ClickHouse/pull/33675) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correctly determine current database if `CREATE TEMPORARY TABLE AS SELECT` is queried inside a named HTTP session. This is a very rare use case. This closes [#8340](https://github.com/ClickHouse/ClickHouse/issues/8340). [#33676](https://github.com/ClickHouse/ClickHouse/pull/33676) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix mutation when table contains projections. This fixes [#33010](https://github.com/ClickHouse/ClickHouse/issues/33010) . This fixes [#33275](https://github.com/ClickHouse/ClickHouse/issues/33275) . [#33679](https://github.com/ClickHouse/ClickHouse/pull/33679) ([Amos Bird](https://github.com/amosbird)). +* Throw exception when storage hdfs list directory failed. [#33724](https://github.com/ClickHouse/ClickHouse/pull/33724) ([LiuNeng](https://github.com/liuneng1994)). +* Fix tiny race between count() and INSERT/merges/... in MergeTree (it is possible to return incorrect number of rows for SELECT with optimize_trivial_count_query). [#33753](https://github.com/ClickHouse/ClickHouse/pull/33753) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug of check table when creating data part with wide format and projection. [#33774](https://github.com/ClickHouse/ClickHouse/pull/33774) ([李扬](https://github.com/taiyang-li)). +* Fix parsing query INSERT INTO ... VALUES SETTINGS ... (...), ... [#33776](https://github.com/ClickHouse/ClickHouse/pull/33776) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug in client that led to 'Connection reset by peer' in server. Closes [#33309](https://github.com/ClickHouse/ClickHouse/issues/33309). [#33790](https://github.com/ClickHouse/ClickHouse/pull/33790) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix usage of external dictionaries with `Redis` source and large number of keys. [#33804](https://github.com/ClickHouse/ClickHouse/pull/33804) ([Anton Popov](https://github.com/CurtizJ)). +* Fix schema inference for JSONEachRow and JSONCompactEachRow. [#33830](https://github.com/ClickHouse/ClickHouse/pull/33830) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix KeyCondition with no common types available. [#33833](https://github.com/ClickHouse/ClickHouse/pull/33833) ([Amos Bird](https://github.com/amosbird)). +* Fix memory leak in `clickhouse-keeper` in case of compression is used (default). [#33840](https://github.com/ClickHouse/ClickHouse/pull/33840) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `replica is not readonly` logical error on `SYSTEM RESTORE REPLICA` query when replica is actually readonly. Fixes [#33806](https://github.com/ClickHouse/ClickHouse/issues/33806). [#33847](https://github.com/ClickHouse/ClickHouse/pull/33847) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix usage of sparse columns (which can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization`). [#33849](https://github.com/ClickHouse/ClickHouse/pull/33849) ([Anton Popov](https://github.com/CurtizJ)). +* Fix crash if sql user defined function is created with lambda with non identifier arguments. Closes [#33866](https://github.com/ClickHouse/ClickHouse/issues/33866). [#33868](https://github.com/ClickHouse/ClickHouse/pull/33868) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix potential race condition when doing remote disk read. cc @Jokser. [#33912](https://github.com/ClickHouse/ClickHouse/pull/33912) ([Amos Bird](https://github.com/amosbird)). +* Aggregate function combinator `-If` did not correctly process `Nullable` filter argument. This closes [#27073](https://github.com/ClickHouse/ClickHouse/issues/27073). [#33920](https://github.com/ClickHouse/ClickHouse/pull/33920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix parsing ZK metadata: now metadata from zookeeper compared with local metadata in canonical form. [#33933](https://github.com/ClickHouse/ClickHouse/pull/33933) ([sunny](https://github.com/sunny19930321)). +* Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). +* Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix parsing IPv6 from query parameter and fix IPv6 to string conversion. Closes [#33928](https://github.com/ClickHouse/ClickHouse/issues/33928). [#33971](https://github.com/ClickHouse/ClickHouse/pull/33971) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug which lead to inability for server to start when both replicated access storage and keeper are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)). +* - Fixes `parallel_view_processing=0` not working when inserting into a table using `VALUES`. - Fixes `view_duration_ms` in the `query_views_log` not being set correctly for materialized views. [#34067](https://github.com/ClickHouse/ClickHouse/pull/34067) ([Raúl Marín](https://github.com/Algunenano)). +* Fix asynchronous inserts with `Native` format. [#34068](https://github.com/ClickHouse/ClickHouse/pull/34068) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible data race in StorageFile that was introduced in https://github.com/ClickHouse/ClickHouse/pull/33960. Closes [#34111](https://github.com/ClickHouse/ClickHouse/issues/34111). [#34113](https://github.com/ClickHouse/ClickHouse/pull/34113) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inserts to distributed tables in case of change of native protocol. The last change was in the version version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3. [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)). +* Fix rare and benign race condition in `HDFS`, `S3` and `URL` storage engines which can lead to additional connections. [#34172](https://github.com/ClickHouse/ClickHouse/pull/34172) ([alesapin](https://github.com/alesapin)). +* Fix schema inference for table runction s3. [#34186](https://github.com/ClickHouse/ClickHouse/pull/34186) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix metric `Query`, which shows number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)). +* Fix reading of subcolumns with dots in their names. In particular fixed reading of `Nested` columns, if their element names contain dots (e.g ```Nested(`keys.name` String, `keys.id` UInt64, values UInt64)```). [#34228](https://github.com/ClickHouse/ClickHouse/pull/34228) ([Anton Popov](https://github.com/CurtizJ)). +* Fix memory leak in case of some Exception during query processing with `optimize_aggregation_in_order=1`. [#34234](https://github.com/ClickHouse/ClickHouse/pull/34234) ([Azat Khuzhin](https://github.com/azat)). +* Fix current_user/current_address for interserver mode (Before this patch current_user/current_address will be preserved from the previous query). [#34263](https://github.com/ClickHouse/ClickHouse/pull/34263) ([Azat Khuzhin](https://github.com/azat)). +* Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed a couple of extremely rare race conditions that might lead to broken state of replication queue and "intersecting parts" error. [#34297](https://github.com/ClickHouse/ClickHouse/pull/34297) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix various issues when projection is enabled by default. Each issue is described in separate commit. This is for [#33678](https://github.com/ClickHouse/ClickHouse/issues/33678) . This fixes [#34273](https://github.com/ClickHouse/ClickHouse/issues/34273). [#34305](https://github.com/ClickHouse/ClickHouse/pull/34305) ([Amos Bird](https://github.com/amosbird)). +* Try to fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)). +* Fix wrong engine syntax in result of `SHOW CREATE DATABASE` query for databases with engine `Memory`. This closes [#34335](https://github.com/ClickHouse/ClickHouse/issues/34335). [#34345](https://github.com/ClickHouse/ClickHouse/pull/34345) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* For SQLUserDefinedFunctions change privilege level from DATABASE to GLOBAL. Closes [#34281](https://github.com/ClickHouse/ClickHouse/issues/34281). [#34404](https://github.com/ClickHouse/ClickHouse/pull/34404) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix segfault in schema inference from url. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible error 'Cannot convert column Function to mask' in short circuit function evaluation. Closes [#34171](https://github.com/ClickHouse/ClickHouse/issues/34171). [#34415](https://github.com/ClickHouse/ClickHouse/pull/34415) ([Kruglov Pavel](https://github.com/Avogar)). +* Add missing lock for storage. Fixes possible race with table deletion. [#34416](https://github.com/ClickHouse/ClickHouse/pull/34416) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible error 'file_size: Operation not supported'. [#34479](https://github.com/ClickHouse/ClickHouse/pull/34479) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix compression support in URL engine. [#34524](https://github.com/ClickHouse/ClickHouse/pull/34524) ([Frank Chen](https://github.com/FrankChen021)). +* Fix comparison between integers and floats in index analysis. Previously it could lead to skipping some granules for reading by mistake. Fixes [#34493](https://github.com/ClickHouse/ClickHouse/issues/34493). [#34528](https://github.com/ClickHouse/ClickHouse/pull/34528) ([Anton Popov](https://github.com/CurtizJ)). +* Fix exception `Chunk should have AggregatedChunkInfo in MergingAggregatedTransform` (in case of `optimize_aggregation_in_order=1` and `distributed_aggregation_memory_efficient=0`). Fixes [#34526](https://github.com/ClickHouse/ClickHouse/issues/34526). [#34532](https://github.com/ClickHouse/ClickHouse/pull/34532) ([Anton Popov](https://github.com/CurtizJ)). +* In case of cancelation S3 and HDFS canceled only current reader, but continued to execute the initial query. Fixes [#34301](https://github.com/ClickHouse/ClickHouse/issues/34301) Relates to [#34397](https://github.com/ClickHouse/ClickHouse/issues/34397). [#34539](https://github.com/ClickHouse/ClickHouse/pull/34539) ([Dmitry Novik](https://github.com/novikd)). +* Fix bug of round/roundBankers, close [#33267](https://github.com/ClickHouse/ClickHouse/issues/33267). [#34562](https://github.com/ClickHouse/ClickHouse/pull/34562) ([李扬](https://github.com/taiyang-li)). +* Fixed the assertion in case of using `allow_experimental_parallel_reading_from_replicas` with `max_parallel_replicas` equals to 1. This fixes [#34525](https://github.com/ClickHouse/ClickHouse/issues/34525). [#34613](https://github.com/ClickHouse/ClickHouse/pull/34613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* - Add Debug workflow to get variables for all actions on demand - Fix lack of pr_info.number for some edge case. [#34644](https://github.com/ClickHouse/ClickHouse/pull/34644) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Additionally check remote_fs_execute_merges_on_single_replica_time_threshold inside ReplicatedMergeTreeQueue"'. [#34201](https://github.com/ClickHouse/ClickHouse/pull/34201) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add func tests run with s3"'. [#34211](https://github.com/ClickHouse/ClickHouse/pull/34211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add pool to WriteBufferFromS3"'. [#34212](https://github.com/ClickHouse/ClickHouse/pull/34212) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Add support agreement page and snippets.'. [#34512](https://github.com/ClickHouse/ClickHouse/pull/34512) ([Tom Risse](https://github.com/flickerbox-tom)). +* NO CL ENTRY: 'Add Gigasheet to adopters'. [#34589](https://github.com/ClickHouse/ClickHouse/pull/34589) ([Brian Hunter](https://github.com/bjhunter)). + +#### NO CL CATEGORY + +* Reverting to previous docker images, will take a closer look at failing tests from [#34373](https://github.com/ClickHouse/ClickHouse/issues/34373). [#34413](https://github.com/ClickHouse/ClickHouse/pull/34413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.2.2.1-stable.md b/docs/changelogs/v22.2.2.1-stable.md new file mode 100644 index 00000000000..c9158290753 --- /dev/null +++ b/docs/changelogs/v22.2.2.1-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v22.2.2.1-stable FIXME as compared to v22.2.1.2139-prestable + diff --git a/docs/changelogs/v22.2.3.5-stable.md b/docs/changelogs/v22.2.3.5-stable.md new file mode 100644 index 00000000000..90c0d22d570 --- /dev/null +++ b/docs/changelogs/v22.2.3.5-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v22.2.3.5-stable FIXME as compared to v22.2.2.1-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#34848](https://github.com/ClickHouse/ClickHouse/issues/34848): Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). + diff --git a/docs/changelogs/v22.3.1.1262-prestable.md b/docs/changelogs/v22.3.1.1262-prestable.md new file mode 100644 index 00000000000..f47afd67021 --- /dev/null +++ b/docs/changelogs/v22.3.1.1262-prestable.md @@ -0,0 +1,146 @@ +### ClickHouse release v22.3.1.1262-prestable FIXME as compared to v22.2.1.2139-prestable + +#### Backward Incompatible Change +* Improvement the toDatetime function overflows. When the date string is very large, it will be converted to 1970. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). +* Make arrayCompact behave as other higher-order functions: perform compaction not of lambda function results but on original array. If you using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping arrayCompact arguments into arrayMap. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). + +#### New Feature +* New data type `Object()`, which supports storing of semi-structured data (for now JSON only). Data is written to such types as string. Then all paths are extracted according to format of semi-structured data and written as separate columns in most optimal types, that can store all their values. Those columns can be queried by names that match paths in source data. E.g `data.key1.key2` or with cast operator `data.key1.key2::Int64`. [#23932](https://github.com/ClickHouse/ClickHouse/pull/23932) ([Anton Popov](https://github.com/CurtizJ)). +* Support authentication of users connected via SSL by their X.509 certificate. [#31484](https://github.com/ClickHouse/ClickHouse/pull/31484) ([eungenue](https://github.com/eungenue)). +* related to issue: [#30715](https://github.com/ClickHouse/ClickHouse/issues/30715). Add three functions for map data type: 1. mapReplace(map1, map2) - replaces values for keys in map1 with the values of the corresponding keys in map2; adds keys from map2 that don't exist in map1. 2. mapFilter 3. mapMap mapFilter and mapMap are higher order functions , accept two arguments, first argument is a lambda function with k, v pair , the second argument is a map type column. [#33698](https://github.com/ClickHouse/ClickHouse/pull/33698) ([hexiaoting](https://github.com/hexiaoting)). +* Add local cache for disk s3. Closes [#28961](https://github.com/ClickHouse/ClickHouse/issues/28961). [#33717](https://github.com/ClickHouse/ClickHouse/pull/33717) ([Kseniia Sumarokova](https://github.com/kssenii)). +* - Add startsWith & endsWith function for arrays, closes [#33982](https://github.com/ClickHouse/ClickHouse/issues/33982). [#34368](https://github.com/ClickHouse/ClickHouse/pull/34368) ([usurai](https://github.com/usurai)). +* Implement DateTime64 transform from and to arrow column, which closes [#8280](https://github.com/ClickHouse/ClickHouse/issues/8280) and closes [#28574](https://github.com/ClickHouse/ClickHouse/issues/28574). [#34561](https://github.com/ClickHouse/ClickHouse/pull/34561) ([李扬](https://github.com/taiyang-li)). +* Add cpu/mem metric for clickhouse-local. Close [#34545](https://github.com/ClickHouse/ClickHouse/issues/34545). [#34605](https://github.com/ClickHouse/ClickHouse/pull/34605) ([李扬](https://github.com/taiyang-li)). +* Support schema inference for inserting into table functions file/hdfs/s3/url. [#34732](https://github.com/ClickHouse/ClickHouse/pull/34732) ([Kruglov Pavel](https://github.com/Avogar)). +* A new settings called is added in server configuration which on/off insecure AUTH_TYPE plaintext-password and no_password. By default the property is set to true which means authType Plaintext_password & NO_password is allowed. [#34738](https://github.com/ClickHouse/ClickHouse/pull/34738) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Add new table function `hive`, usage as follow ``` hive('', '', '', '', '') ``` for example ``` SELECT * FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') ```. [#34946](https://github.com/ClickHouse/ClickHouse/pull/34946) ([lgbo](https://github.com/lgbo-ustc)). +* - When use clickhouse-client logining, If user and password is not specified in command line or configuration file, get them from `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables. Close [#34538](https://github.com/ClickHouse/ClickHouse/issues/34538). [#34947](https://github.com/ClickHouse/ClickHouse/pull/34947) ([DR](https://github.com/freedomDR)). +* Added date_time_input_format = 'best_effort_us'. Closes [#34799](https://github.com/ClickHouse/ClickHouse/issues/34799). [#34982](https://github.com/ClickHouse/ClickHouse/pull/34982) ([WenYao](https://github.com/Cai-Yao)). +* Changed the Play UI to select a theme by the following priority: * 'theme' GET parameter * 'theme' in localStorage * According to OS preference (didn't work before). [#35068](https://github.com/ClickHouse/ClickHouse/pull/35068) ([peledni](https://github.com/peledni)). +* ``` sql ) explain ast graph = 1 select * from system.parts;. [#35173](https://github.com/ClickHouse/ClickHouse/pull/35173) ([李扬](https://github.com/taiyang-li)). +* Add `database_replicated_allow_only_replicated_engine` setting. When enabled, it only allowed to create `Replicated` tables in `Replicated` database. [#35214](https://github.com/ClickHouse/ClickHouse/pull/35214) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Performance Improvement +* Calling std::distance on large list will decrease performance, use a version in Node to substitute with more memory 80MB/1000w Nodes. The logic is: Every node is list has a version with type size_t, setting in insert or insertOrReplace method using the class member variable current_version. The version is only increase at enableSnapshot method. When doing a snapshot, call snapshotSizeWithVersion to get snapshot size and version(snapshot_up_to_version). When traversing the list, if node version is less then or equal to snapshot_up_to_version, then protects it from deleting if node version is bigger than snapshot_up_to_version, we can do anything to it. [#34486](https://github.com/ClickHouse/ClickHouse/pull/34486) ([zhanglistar](https://github.com/zhanglistar)). +* Compaction of log store in Nuraft need acquire an inner lock which also used in normal commit process, so we delete useless logs in `compact` method of Changelog class in a background thread. See details on: https://github.com/ClickHouse-Extras/NuRaft/blob/1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d/src/handle_commit.cxx#L560. [#34534](https://github.com/ClickHouse/ClickHouse/pull/34534) ([zhanglistar](https://github.com/zhanglistar)). +* Don't hold the latest snapshot in memory, instead, reading the snapshot if needed, sequence reading is fast to 200+MBps even on HDD using mmap system call. Writing snapshot data directly to disk using compression method without holding original data and compressed data in memory. [#34584](https://github.com/ClickHouse/ClickHouse/pull/34584) ([zhanglistar](https://github.com/zhanglistar)). +* MergeTree improve insert performance replacing std::stable_sort with pdqsort. [#34750](https://github.com/ClickHouse/ClickHouse/pull/34750) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve the performance of the `ANY` aggregation function by acting over batches. [#34760](https://github.com/ClickHouse/ClickHouse/pull/34760) ([Raúl Marín](https://github.com/Algunenano)). +* Improve performance of `detectCharset `, `detectLanguageUnknown ` functions. Improve performance of `DirectDictionary` if dictionary source is `ClickHouse`. Improve performance of processing queries with large `IN` section. [#34888](https://github.com/ClickHouse/ClickHouse/pull/34888) ([Maksim Kita](https://github.com/kitaisreal)). +* Less lock on connection using atomic stat. Notice that it is an approximate stat. [#35010](https://github.com/ClickHouse/ClickHouse/pull/35010) ([zhanglistar](https://github.com/zhanglistar)). + +#### Improvement +* Make the znode ctime and mtime consistent between servers. [#33441](https://github.com/ClickHouse/ClickHouse/pull/33441) ([小路](https://github.com/nicelulu)). +* Hold time lock while assigning tasks to clear old temporary directories in StorageMergeTree. [#34025](https://github.com/ClickHouse/ClickHouse/pull/34025) ([Amos Bird](https://github.com/amosbird)). +* When large files were written with `s3` table function or table engine, the content type on the files was mistakenly set to `application/xml` due to a bug in the AWS SDK. This closes [#33964](https://github.com/ClickHouse/ClickHouse/issues/33964). [#34433](https://github.com/ClickHouse/ClickHouse/pull/34433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve schema inference with globs in FIle/S3/HDFS/URL engines. Try to use the next path for schema inference in case of error. [#34465](https://github.com/ClickHouse/ClickHouse/pull/34465) ([Kruglov Pavel](https://github.com/Avogar)). +* - Improve the opentelemetry span logs for INSERT operation on distributed table. [#34480](https://github.com/ClickHouse/ClickHouse/pull/34480) ([Frank Chen](https://github.com/FrankChen021)). +* MaterializedMySQL support materialized_mysql_tables_list(a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated) settings, mentioned at [#32977](https://github.com/ClickHouse/ClickHouse/issues/32977). [#34487](https://github.com/ClickHouse/ClickHouse/pull/34487) ([zzsmdfj](https://github.com/zzsmdfj)). +* This PR changes restrictive row policies a bit to make them an easier alternative to permissive policies in easy cases. If for a particular table only restrictive policies exist (without permissive policies) users will be able to see some rows. Also `SHOW CREATE ROW POLICY` will always show `AS permissive` or `AS restrictive` in row policy's definition. [#34596](https://github.com/ClickHouse/ClickHouse/pull/34596) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `encodeURLComponent`, 'encodeURLFormComponent' function. Closes [#31092](https://github.com/ClickHouse/ClickHouse/issues/31092). [#34607](https://github.com/ClickHouse/ClickHouse/pull/34607) ([zzsmdfj](https://github.com/zzsmdfj)). +* Now you can read `system.zookeeper` table without restrictions on path or using `like` expression. This reads can generate quite heavy load for zookeeper so to enable this ability you have to enable setting `allow_unrestricted_reads_from_keeper`. [#34609](https://github.com/ClickHouse/ClickHouse/pull/34609) ([Sergei Trifonov](https://github.com/serxa)). +* Some refactoring and improvement over async and remote buffer related stuff. Separated in each commit. [#34629](https://github.com/ClickHouse/ClickHouse/pull/34629) ([Amos Bird](https://github.com/amosbird)). +* ExecutableUserDefinedFunctions allow to specify argument names. This is necessary for formats where argument name is part of serialization, like `Native`, `JSONEachRow`. Closes [#34604](https://github.com/ClickHouse/ClickHouse/issues/34604). [#34653](https://github.com/ClickHouse/ClickHouse/pull/34653) ([Maksim Kita](https://github.com/kitaisreal)). +* Extract schema only once on table creation and prevent reading from local files/external sources to extract schema on each server startup. [#34684](https://github.com/ClickHouse/ClickHouse/pull/34684) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not reset logging that configured via --log-file/--errorlog-file in case of empty logger.log/logger.errorlog. [#34718](https://github.com/ClickHouse/ClickHouse/pull/34718) ([Amos Bird](https://github.com/amosbird)). +* Support `remote()`/`cluster()` for `parallel_distributed_insert_select=2`. [#34728](https://github.com/ClickHouse/ClickHouse/pull/34728) ([Azat Khuzhin](https://github.com/azat)). +* Add name hints for data skipping indices. Closes [#29698](https://github.com/ClickHouse/ClickHouse/issues/29698). [#34764](https://github.com/ClickHouse/ClickHouse/pull/34764) ([flynn](https://github.com/ucasfl)). +* Now `ALTER TABLE DROP COLUMN columnX` queries for `MergeTree` table engines will work instantly when `columnX` is `ALIAS` column. Fixes [#34660](https://github.com/ClickHouse/ClickHouse/issues/34660). [#34786](https://github.com/ClickHouse/ClickHouse/pull/34786) ([alesapin](https://github.com/alesapin)). +* In previous versions the progress bar in clickhouse-client can jump forward near 50% for no reason. This closes [#34324](https://github.com/ClickHouse/ClickHouse/issues/34324). [#34801](https://github.com/ClickHouse/ClickHouse/pull/34801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix reading only columns which user asked for. Closes [#34163](https://github.com/ClickHouse/ClickHouse/issues/34163). [#34849](https://github.com/ClickHouse/ClickHouse/pull/34849) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Implement MemoryStatisticsOS for FreeBSD. [#34902](https://github.com/ClickHouse/ClickHouse/pull/34902) ([Alexandre Snarskii](https://github.com/snar)). +* Allow to open empty sqlite db file if it does not exist. Closes [#33367](https://github.com/ClickHouse/ClickHouse/issues/33367). [#34907](https://github.com/ClickHouse/ClickHouse/pull/34907) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow LowCardinality strings for ngrambf_v1/tokenbf_v1 indexes. Closes [#21865](https://github.com/ClickHouse/ClickHouse/issues/21865). [#34911](https://github.com/ClickHouse/ClickHouse/pull/34911) ([Lars Hiller Eidnes](https://github.com/larspars)). +* Ignore per-column `TTL` in `CREATE TABLE AS` if new table engine does not support it (i.e. if the engine is not of `MergeTree` family). [#34938](https://github.com/ClickHouse/ClickHouse/pull/34938) ([Azat Khuzhin](https://github.com/azat)). +* Use connection pool for hive metastore client. [#34940](https://github.com/ClickHouse/ClickHouse/pull/34940) ([lgbo](https://github.com/lgbo-ustc)). +* Currently, if the user changes the settings of the system tables there will be tons of logs and ClickHouse will rename the tables every minute. This fixes [#34929](https://github.com/ClickHouse/ClickHouse/issues/34929). [#34949](https://github.com/ClickHouse/ClickHouse/pull/34949) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* remove unnecessary columns for reading parquet/orc files. [#34954](https://github.com/ClickHouse/ClickHouse/pull/34954) ([lgbo](https://github.com/lgbo-ustc)). +* For random access readbuffer in hive, the first time to read the readbuffer would use the original readbuffer instead of local file. When we read a parquet/orc format file, the readbuffer seeks to the end of the file, which will be blocked until the local file finishes download, and make the whold process slow. [#34957](https://github.com/ClickHouse/ClickHouse/pull/34957) ([lgbo](https://github.com/lgbo-ustc)). +* Add more sanity checks for keeper configuration: now mixing of localhost and non-local servers is not allowed, also add checks for same value of internal raft port and keeper client port. [#35004](https://github.com/ClickHouse/ClickHouse/pull/35004) ([alesapin](https://github.com/alesapin)). +* Functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants` support implicit key cast and constant arguments. Closes [#34970](https://github.com/ClickHouse/ClickHouse/issues/34970). [#35027](https://github.com/ClickHouse/ClickHouse/pull/35027) ([Maksim Kita](https://github.com/kitaisreal)). +* Avoid division by zero in Query Profiler if Linux kernel has a bug. Closes [#34787](https://github.com/ClickHouse/ClickHouse/issues/34787). [#35032](https://github.com/ClickHouse/ClickHouse/pull/35032) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid possible `MEMORY_LIMIT_EXCEEDED` during `INSERT` into `Buffer` with `AggregateFunction`. [#35072](https://github.com/ClickHouse/ClickHouse/pull/35072) ([Azat Khuzhin](https://github.com/azat)). +* Support `view()` for `parallel_distributed_insert_select`. [#35132](https://github.com/ClickHouse/ClickHouse/pull/35132) ([Azat Khuzhin](https://github.com/azat)). +* Add setting to lower column case when reading parquet/ORC file. [#35145](https://github.com/ClickHouse/ClickHouse/pull/35145) ([shuchaome](https://github.com/shuchaome)). +* Do not retry non-rertiable errors. Closes [#35161](https://github.com/ClickHouse/ClickHouse/issues/35161). [#35172](https://github.com/ClickHouse/ClickHouse/pull/35172) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added disk_name to system.part_log. [#35178](https://github.com/ClickHouse/ClickHouse/pull/35178) ([Artyom Yurkov](https://github.com/Varinara)). +* Currently,Clickhouse validates hosts defined under for URL and Remote Table functions. This PR extends the RemoteHostFilter to Mysql and PostgreSQL table functions. [#35191](https://github.com/ClickHouse/ClickHouse/pull/35191) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Sometimes it is not enough for us to distinguish queries hierachy only by is_initial_query in system.query_log and system.processes. So distributed_depth is needed. [#35207](https://github.com/ClickHouse/ClickHouse/pull/35207) ([李扬](https://github.com/taiyang-li)). +* Support test mode for clickhouse-local. [#35264](https://github.com/ClickHouse/ClickHouse/pull/35264) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Return const for function getMacro if not in distributed query. Close [#34727](https://github.com/ClickHouse/ClickHouse/issues/34727). [#35289](https://github.com/ClickHouse/ClickHouse/pull/35289) ([李扬](https://github.com/taiyang-li)). +* Reload `remote_url_allow_hosts` after config update. [#35294](https://github.com/ClickHouse/ClickHouse/pull/35294) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix +* Ignore obsolete grants in ATTACH GRANT statements. This PR fixes [#34815](https://github.com/ClickHouse/ClickHouse/issues/34815). [#34855](https://github.com/ClickHouse/ClickHouse/pull/34855) ([Vitaly Baranov](https://github.com/vitlibar)). +* When the inner readbuffer's buffer size is too small, NEED_MORE_INPUT in `HadoopSnappyDecoder` will run multi times (>=3)for one compressed block. This makes the input data be copied into the wrong place in `HadoopSnappyDecoder::buffer`. [#35116](https://github.com/ClickHouse/ClickHouse/pull/35116) ([lgbo](https://github.com/lgbo-ustc)). + +#### Build/Testing/Packaging Improvement +* Randomize some settings in functional tests. This closes [#32268](https://github.com/ClickHouse/ClickHouse/issues/32268). [#34092](https://github.com/ClickHouse/ClickHouse/pull/34092) ([Kruglov Pavel](https://github.com/Avogar)). +* NA. [#34513](https://github.com/ClickHouse/ClickHouse/pull/34513) ([vzakaznikov](https://github.com/vzakaznikov)). +* Debian package clickhouse-test.deb removed completely. CI use tests from repository and standalone testing via deb package is no longer supported. [#34606](https://github.com/ClickHouse/ClickHouse/pull/34606) ([Ilya Yatsishin](https://github.com/qoega)). +* Set timeout 40 minutes for fast tests. [#34624](https://github.com/ClickHouse/ClickHouse/pull/34624) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Drop PVS test from CI. [#34680](https://github.com/ClickHouse/ClickHouse/pull/34680) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Limit DWARF version for debug info by 4 max, because our internal stack symbolizer cannot parse DWARF version 5. This makes sense if you compile ClickHouse with clang-15. [#34777](https://github.com/ClickHouse/ClickHouse/pull/34777) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve CI scripts arguments. [#34792](https://github.com/ClickHouse/ClickHouse/pull/34792) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use @robot-clickhouse as an author and committer for PRs like https://github.com/ClickHouse/ClickHouse/pull/34685. [#34793](https://github.com/ClickHouse/ClickHouse/pull/34793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Separate smaller clickhouse-keeper build. [#35031](https://github.com/ClickHouse/ClickHouse/pull/35031) ([alesapin](https://github.com/alesapin)). +* Clion has the following problems "The breakpoint will not currently be hit. No executable code is associated with this line". [#35179](https://github.com/ClickHouse/ClickHouse/pull/35179) ([小路](https://github.com/nicelulu)). +* Add an ability to build stripped binaries with cmake. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix distributed subquery max_query_size limitation inconsistency. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). +* Fix incorrect trivial count result when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). +* Stop to select part for mutate when the other replica has already updated the /log for ReplatedMergeTree engine. [#34633](https://github.com/ClickHouse/ClickHouse/pull/34633) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix `allow_experimental_projection_optimization` with `enable_global_with_statement` (before it may lead to `Stack size too large` error in case of multiple expressions in `WITH` clause, and also it executes scalar subqueries again and again, so not it will be more optimal). [#34650](https://github.com/ClickHouse/ClickHouse/pull/34650) ([Azat Khuzhin](https://github.com/azat)). +* Fix serialization/printing for system queries `RELOAD MODEL`, `RELOAD FUNCTION`, `RESTART DISK` when used `ON CLUSTER`. Closes [#34514](https://github.com/ClickHouse/ClickHouse/issues/34514). [#34696](https://github.com/ClickHouse/ClickHouse/pull/34696) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix ENOENT with fsync_part_directory and Vertical merge. [#34739](https://github.com/ClickHouse/ClickHouse/pull/34739) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug for h3 funcs containing const columns which cause queries to fail. [#34743](https://github.com/ClickHouse/ClickHouse/pull/34743) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix bugs for multiple columns group by in WindowView. [#34859](https://github.com/ClickHouse/ClickHouse/pull/34859) ([vxider](https://github.com/Vxider)). +* Support DDLs like CREATE USER to be executed on cross replicated cluster. [#34860](https://github.com/ClickHouse/ClickHouse/pull/34860) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix asynchronous inserts to table functions. Fixes [#34864](https://github.com/ClickHouse/ClickHouse/issues/34864). [#34866](https://github.com/ClickHouse/ClickHouse/pull/34866) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible "Part directory doesn't exist" during `INSERT`. [#34876](https://github.com/ClickHouse/ClickHouse/pull/34876) ([Azat Khuzhin](https://github.com/azat)). +* Fix postgres datetime64 conversion. Closes [#33364](https://github.com/ClickHouse/ClickHouse/issues/33364). [#34910](https://github.com/ClickHouse/ClickHouse/pull/34910) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Avoid busy polling in keeper while searching for changelog files to delete. [#34931](https://github.com/ClickHouse/ClickHouse/pull/34931) ([Azat Khuzhin](https://github.com/azat)). +* Unexpected result when use `in` in `where` in hive query. [#34945](https://github.com/ClickHouse/ClickHouse/pull/34945) ([lgbo](https://github.com/lgbo-ustc)). +* Fix wrong schema inference for unquoted dates in CSV. Closes [#34768](https://github.com/ClickHouse/ClickHouse/issues/34768). [#34961](https://github.com/ClickHouse/ClickHouse/pull/34961) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible rare error `Cannot push block to port which already has data`. Avoid pushing to port with data inside `DelayedSource`. [#34993](https://github.com/ClickHouse/ClickHouse/pull/34993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible segfault in filelog. Closes [#30749](https://github.com/ClickHouse/ClickHouse/issues/30749). [#34996](https://github.com/ClickHouse/ClickHouse/pull/34996) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix unexpected result when use -state type aggregate function in window frame. [#34999](https://github.com/ClickHouse/ClickHouse/pull/34999) ([metahys](https://github.com/metahys)). +* Fix possible exception `Reading for MergeTree family tables must be done with last position boundary`. Closes [#34979](https://github.com/ClickHouse/ClickHouse/issues/34979). [#35001](https://github.com/ClickHouse/ClickHouse/pull/35001) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix reading from `system.asynchronous_inserts` table if there exists asynchronous insert into table function. [#35050](https://github.com/ClickHouse/ClickHouse/pull/35050) ([Anton Popov](https://github.com/CurtizJ)). +* Fix missing alias after function is optimized to subcolumn when setting `optimize_functions_to_subcolumns` is enabled. Closes [#33798](https://github.com/ClickHouse/ClickHouse/issues/33798). [#35079](https://github.com/ClickHouse/ClickHouse/pull/35079) ([qieqieplus](https://github.com/qieqieplus)). +* Avoid possible deadlock on server shutdown. [#35081](https://github.com/ClickHouse/ClickHouse/pull/35081) ([Azat Khuzhin](https://github.com/azat)). +* Fixed the "update_lag" external dictionary configuration option being unusable with the error message ``Unexpected key `update_lag` in dictionary source configuration``. [#35089](https://github.com/ClickHouse/ClickHouse/pull/35089) ([Jason Chu](https://github.com/1lann)). +* fix issue: [#31469](https://github.com/ClickHouse/ClickHouse/issues/31469). [#35118](https://github.com/ClickHouse/ClickHouse/pull/35118) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix `optimize_skip_unused_shards_rewrite_in` for signed columns and negative values. [#35134](https://github.com/ClickHouse/ClickHouse/pull/35134) ([Azat Khuzhin](https://github.com/azat)). +* Fixed the incorrect translation YAML config to XML. [#35135](https://github.com/ClickHouse/ClickHouse/pull/35135) ([Miel Donkers](https://github.com/mdonkers)). +* Fix partition pruning error when non-monotonic function is used with IN operator. This fixes [#35136](https://github.com/ClickHouse/ClickHouse/issues/35136). [#35146](https://github.com/ClickHouse/ClickHouse/pull/35146) ([Amos Bird](https://github.com/amosbird)). +* Fix materialised postrgesql adding new table to replication (ATTACH TABLE) after manually removing (DETACH TABLE). Closes [#33800](https://github.com/ClickHouse/ClickHouse/issues/33800). Closes [#34922](https://github.com/ClickHouse/ClickHouse/issues/34922). Closes [#34315](https://github.com/ClickHouse/ClickHouse/issues/34315). [#35158](https://github.com/ClickHouse/ClickHouse/pull/35158) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix materialised postgres `table overrides` for partition by, etc. Closes [#35048](https://github.com/ClickHouse/ClickHouse/issues/35048). [#35162](https://github.com/ClickHouse/ClickHouse/pull/35162) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Schema inference didn't work properly on case of `INSERT INTO FUNCTION s3(...) FROM ...`, it tried to read schema from s3 file instead of from select query. [#35176](https://github.com/ClickHouse/ClickHouse/pull/35176) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix `replaceRegexpAll`, close [#35117](https://github.com/ClickHouse/ClickHouse/issues/35117). [#35182](https://github.com/ClickHouse/ClickHouse/pull/35182) ([Vladimir C](https://github.com/vdimir)). +* Fix error in query with `WITH TOTALS` in case if `HAVING` returned empty result. This fixes [#33711](https://github.com/ClickHouse/ClickHouse/issues/33711). [#35186](https://github.com/ClickHouse/ClickHouse/pull/35186) ([Amos Bird](https://github.com/amosbird)). +* * Fix reading port from config, close [#34776](https://github.com/ClickHouse/ClickHouse/issues/34776). [#35193](https://github.com/ClickHouse/ClickHouse/pull/35193) ([Vladimir C](https://github.com/vdimir)). +* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). +* Wait for IDiskRemote thread pool properly. [#35257](https://github.com/ClickHouse/ClickHouse/pull/35257) ([Azat Khuzhin](https://github.com/azat)). +* Fix `CHECK TABLE` query in case when sparse columns are enabled in table. [#35274](https://github.com/ClickHouse/ClickHouse/pull/35274) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible Abort while using Brotli compression with a small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35281](https://github.com/ClickHouse/ClickHouse/pull/35281) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible segfault in JSONEachRow schema inference. [#35291](https://github.com/ClickHouse/ClickHouse/pull/35291) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using lzma compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35295](https://github.com/ClickHouse/ClickHouse/pull/35295) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible segfault while using lz4 compression with a small max_read_buffer_size setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35296](https://github.com/ClickHouse/ClickHouse/pull/35296) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using bzip2 compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35300](https://github.com/ClickHouse/ClickHouse/pull/35300) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix partial merge join duplicate rows bug, close [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009). [#35311](https://github.com/ClickHouse/ClickHouse/pull/35311) ([Vladimir C](https://github.com/vdimir)). +* Fix segfault in Postgres database when getting create table query if database was created using named collections. Closes [#35312](https://github.com/ClickHouse/ClickHouse/issues/35312). [#35313](https://github.com/ClickHouse/ClickHouse/pull/35313) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)). + +#### NO CL ENTRY + +* NO CL ENTRY: '[ImgBot] Optimize images'. [#34590](https://github.com/ClickHouse/ClickHouse/pull/34590) ([imgbot[bot]](https://github.com/apps/imgbot)). +* NO CL ENTRY: 'Revert "Allow restrictive row policies without permissive"'. [#34782](https://github.com/ClickHouse/ClickHouse/pull/34782) ([Vitaly Baranov](https://github.com/vitlibar)). +* NO CL ENTRY: 'Revert "Remove "bugs" that do not exist anymore"'. [#35241](https://github.com/ClickHouse/ClickHouse/pull/35241) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Change timezone in Docker"'. [#35243](https://github.com/ClickHouse/ClickHouse/pull/35243) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix 00900_long_parquet_load"'. [#35301](https://github.com/ClickHouse/ClickHouse/pull/35301) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v22.3.2.2-lts.md b/docs/changelogs/v22.3.2.2-lts.md new file mode 100644 index 00000000000..fc37facc7af --- /dev/null +++ b/docs/changelogs/v22.3.2.2-lts.md @@ -0,0 +1,6 @@ +### ClickHouse release v22.3.2.2-lts FIXME as compared to v22.3.1.1262-prestable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v22.3.3.44-lts.md b/docs/changelogs/v22.3.3.44-lts.md new file mode 100644 index 00000000000..4214512c533 --- /dev/null +++ b/docs/changelogs/v22.3.3.44-lts.md @@ -0,0 +1,14 @@ +### ClickHouse release v22.3.3.44-lts FIXME as compared to v22.3.2.2-lts + +#### Bug Fix +* Backported in [#35928](https://github.com/ClickHouse/ClickHouse/issues/35928): Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#35415](https://github.com/ClickHouse/ClickHouse/issues/35415): Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#35563](https://github.com/ClickHouse/ClickHouse/issues/35563): Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#35783](https://github.com/ClickHouse/ClickHouse/issues/35783): Fix bug in conversion from custom types to string that could lead to segfault or unexpected error messages. Closes [#35752](https://github.com/ClickHouse/ClickHouse/issues/35752). [#35755](https://github.com/ClickHouse/ClickHouse/pull/35755) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#35881](https://github.com/ClickHouse/ClickHouse/issues/35881): Fixes parsing of the arguments of the functions `extract`. Fixes [#35751](https://github.com/ClickHouse/ClickHouse/issues/35751). [#35799](https://github.com/ClickHouse/ClickHouse/pull/35799) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#35856](https://github.com/ClickHouse/ClickHouse/issues/35856): Respect only quota & period from groups, ignore shares (which are not really limit the number of the cores which can be used). [#35815](https://github.com/ClickHouse/ClickHouse/pull/35815) ([filimonov](https://github.com/filimonov)). +* Backported in [#35938](https://github.com/ClickHouse/ClickHouse/issues/35938): Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v22.3.4.20-lts.md b/docs/changelogs/v22.3.4.20-lts.md new file mode 100644 index 00000000000..e746a8e3e0b --- /dev/null +++ b/docs/changelogs/v22.3.4.20-lts.md @@ -0,0 +1,14 @@ +### ClickHouse release v22.3.4.20-lts FIXME as compared to v22.3.3.44-lts + +#### Build/Testing/Packaging Improvement +* - Add `_le_` method for ClickHouseVersion - Fix auto_version for existing tag - docker_server now support getting version from tags - Add python unit tests to backport workflow. [#36028](https://github.com/ClickHouse/ClickHouse/pull/36028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36244](https://github.com/ClickHouse/ClickHouse/issues/36244): Fix usage of quota with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#36240](https://github.com/ClickHouse/ClickHouse/issues/36240): Fix possible loss of subcolumns in type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#36242](https://github.com/ClickHouse/ClickHouse/issues/36242): Fix possible `Can't adjust last granule` exception while reading subcolumns of type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#35938](https://github.com/ClickHouse/ClickHouse/issues/35938): Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#36147](https://github.com/ClickHouse/ClickHouse/issues/36147): Fix reading from `Kafka` tables when `kafka_num_consumers > 1` and `kafka_thread_per_consumer = 0`. Returns parallel & multithreaded reading, accidentally broken in 21.11. Closes [#35153](https://github.com/ClickHouse/ClickHouse/issues/35153). [#35973](https://github.com/ClickHouse/ClickHouse/pull/35973) ([filimonov](https://github.com/filimonov)). +* Backported in [#36276](https://github.com/ClickHouse/ClickHouse/issues/36276): Fix reading of empty arrays in reverse order (in queries with descending sorting by prefix of primary key). [#36215](https://github.com/ClickHouse/ClickHouse/pull/36215) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v22.3.5.5-lts.md b/docs/changelogs/v22.3.5.5-lts.md new file mode 100644 index 00000000000..d1c42807f41 --- /dev/null +++ b/docs/changelogs/v22.3.5.5-lts.md @@ -0,0 +1,7 @@ +### ClickHouse release v22.3.5.5-lts FIXME as compared to v22.3.4.20-lts + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36525](https://github.com/ClickHouse/ClickHouse/issues/36525): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#36674](https://github.com/ClickHouse/ClickHouse/issues/36674): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v22.3.6.5-lts.md b/docs/changelogs/v22.3.6.5-lts.md new file mode 100644 index 00000000000..70a81bbe9ad --- /dev/null +++ b/docs/changelogs/v22.3.6.5-lts.md @@ -0,0 +1,7 @@ +### ClickHouse release v22.3.6.5-lts FIXME as compared to v22.3.5.5-lts + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36525](https://github.com/ClickHouse/ClickHouse/issues/36525): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#36795](https://github.com/ClickHouse/ClickHouse/issues/36795): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v22.4.1.2305-prestable.md b/docs/changelogs/v22.4.1.2305-prestable.md new file mode 100644 index 00000000000..ffd11c7d9c4 --- /dev/null +++ b/docs/changelogs/v22.4.1.2305-prestable.md @@ -0,0 +1,239 @@ +### ClickHouse release v22.4.1.2305-prestable FIXME as compared to v22.3.1.1262-prestable + +#### Backward Incompatible Change +* Function `yandexConsistentHash` (consistent hashing algorithm by Konstantin "kostik" Oblakov) is renamed to `kostikConsistentHash`. The old name is left as an alias for compatibility. Although this change is backward compatible, we may remove the alias in subsequent releases, that's why it's recommended to update the usages of this function in your apps. [#35553](https://github.com/ClickHouse/ClickHouse/pull/35553) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `parser_settings_after_format_compact` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)). +* Changed hashed path for cache files. [#36079](https://github.com/ClickHouse/ClickHouse/pull/36079) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### New Feature +* Added support for transactions for simple `MergeTree` tables. This feature is highly experimental and not recommended for production. Part of [#22086](https://github.com/ClickHouse/ClickHouse/issues/22086). [#24258](https://github.com/ClickHouse/ClickHouse/pull/24258) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added load balancing setting for [Zoo]Keeper client. Closes [#29617](https://github.com/ClickHouse/ClickHouse/issues/29617). [#30325](https://github.com/ClickHouse/ClickHouse/pull/30325) ([小路](https://github.com/nicelulu)). +* New aggregation function groupSortedArray to obtain an array of first N values. [#34055](https://github.com/ClickHouse/ClickHouse/pull/34055) ([palegre-tiny](https://github.com/palegre-tiny)). +* New functions minSampleSizeContinous and minSampleSizeConversion. [#34354](https://github.com/ClickHouse/ClickHouse/pull/34354) ([achimbab](https://github.com/achimbab)). +* Profiling on Processors level (under `log_processors_profiles` setting, ClickHouse will write time that processor spent during execution/waiting for data to `system.processors_profile_log` table). [#34355](https://github.com/ClickHouse/ClickHouse/pull/34355) ([Azat Khuzhin](https://github.com/azat)). +* Add `toEndOfMonth` function which rounds up a date or date with time to the last day of the month. [#33501](https://github.com/ClickHouse/ClickHouse/issues/33501). [#34394](https://github.com/ClickHouse/ClickHouse/pull/34394) ([Habibullah Oladepo](https://github.com/holadepo)). +* Add `h3PointDistM`, `h3PointDistKm`, `h3PointDistRads`, `h3GetRes0Indexes`, `h3GetPentagonIndexes` functions. [#34568](https://github.com/ClickHouse/ClickHouse/pull/34568) ([Bharat Nallan](https://github.com/bharatnc)). +* Introduce format `ProtobufList`. Fixes [#16436](https://github.com/ClickHouse/ClickHouse/issues/16436). [#35152](https://github.com/ClickHouse/ClickHouse/pull/35152) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* A dedicated small package for `clickhouse-keeper`. [#35308](https://github.com/ClickHouse/ClickHouse/pull/35308) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* added INTERPOLATE extension to the ORDER BY ... WITH FILL closes [#34903](https://github.com/ClickHouse/ClickHouse/issues/34903). [#35349](https://github.com/ClickHouse/ClickHouse/pull/35349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Added functions `minSampleSizeContinous` and `minSampleSizeConversion`. Author @achimbab. [#35360](https://github.com/ClickHouse/ClickHouse/pull/35360) ([Maksim Kita](https://github.com/kitaisreal)). +* Added functions `arrayFirstOrNull`, `arrayLastOrNull`. Closes [#35238](https://github.com/ClickHouse/ClickHouse/issues/35238). [#35414](https://github.com/ClickHouse/ClickHouse/pull/35414) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to write remote fs cache on all write operations. Add `system.remote_filesystem_cache` table. Add `drop remote filesystem cache` query. Add introspection for s3 metadata with `system.remote_data_paths` table. Closes [#34021](https://github.com/ClickHouse/ClickHouse/issues/34021). Add cache option for merges by adding mode `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` (turned on by default for merges and can also be turned on by query setting with the same name). Rename cache related settings (`remote_fs_enable_cache -> enable_filesystem_cache`, etc). [#35475](https://github.com/ClickHouse/ClickHouse/pull/35475) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added functions makeDate(year, month, day), makeDate32(year, month, day). [#35628](https://github.com/ClickHouse/ClickHouse/pull/35628) ([Alexander Gololobov](https://github.com/davenger)). +* Added function `flattenTuple`. It receives nested named `Tuple` as an argument and returns a flatten `Tuple` which elements are the paths from the original `Tuple`. E.g.: `Tuple(a Int, Tuple(b Int, c Int)) -> Tuple(a Int, b Int, c Int)`. `flattenTuple` can be used to select all paths from type `Object` as separate columns. [#35690](https://github.com/ClickHouse/ClickHouse/pull/35690) ([Anton Popov](https://github.com/CurtizJ)). +* Support new type of quota `WRITTEN BYTES` to limit amount of written bytes during insert queries. [#35736](https://github.com/ClickHouse/ClickHouse/pull/35736) ([Anton Popov](https://github.com/CurtizJ)). +* Implementation of makeDateTime() and makeDateTIme64(). [#35934](https://github.com/ClickHouse/ClickHouse/pull/35934) ([Alexander Gololobov](https://github.com/davenger)). +* Support '\G;' at the end of query for FORMAT Vertical. Closes [#36111](https://github.com/ClickHouse/ClickHouse/issues/36111). [#36130](https://github.com/ClickHouse/ClickHouse/pull/36130) ([yuuch](https://github.com/yuuch)). +* Adding random salt and appending to password to generate password hash. [#36172](https://github.com/ClickHouse/ClickHouse/pull/36172) ([Rajkumar Varada](https://github.com/varadarajkumar)). +* Add setting throw_if_no_data_to_insert. Closes [#36336](https://github.com/ClickHouse/ClickHouse/issues/36336). [#36345](https://github.com/ClickHouse/ClickHouse/pull/36345) ([flynn](https://github.com/ucasfl)). +* Implement type inference for INSERT INTO function null(). Closes [#36334](https://github.com/ClickHouse/ClickHouse/issues/36334). [#36353](https://github.com/ClickHouse/ClickHouse/pull/36353) ([flynn](https://github.com/ucasfl)). +* ... [#36436](https://github.com/ClickHouse/ClickHouse/pull/36436) ([Rich Raposa](https://github.com/rfraposa)). + +#### Performance Improvement +* Speed up parts loading process of MergeTree to accelerate starting up of clickhouse-server. With this improvement, clickhouse-server was able to decrease starting up time from 75 minutes to 20 seconds, with 700k mergetree parts. [#32928](https://github.com/ClickHouse/ClickHouse/pull/32928) ([李扬](https://github.com/taiyang-li)). +* Sizes of hash tables used during aggregation now collected and used in later queries to avoid hash tables resizes. [#33439](https://github.com/ClickHouse/ClickHouse/pull/33439) ([Nikita Taranov](https://github.com/nickitat)). +* Multiple changes to improve ASOF join performance (1.2 - 1.6x as fast). It also adds support to use big integers. [#34733](https://github.com/ClickHouse/ClickHouse/pull/34733) ([Raúl Marín](https://github.com/Algunenano)). +* URL storage engine now downloads multiple chunks in parallel if the endpoint supports HTTP Range. Two additional settings were added, `max_download_threads` and `max_download_buffer_size`, which control maximum number of threads a single query can use to download the file and the maximum number of bytes each thread can process. [#35150](https://github.com/ClickHouse/ClickHouse/pull/35150) ([Antonio Andelic](https://github.com/antonio2368)). +* parallelization of multipart upload into S3 storage. [#35343](https://github.com/ClickHouse/ClickHouse/pull/35343) ([Sergei Trifonov](https://github.com/serxa)). +* Improve performance of ASOF JOIN if key is native integer. [#35525](https://github.com/ClickHouse/ClickHouse/pull/35525) ([Maksim Kita](https://github.com/kitaisreal)). +* A new query plan optimization. Evaluate functions after `ORDER BY` when possible. As an example, for a query `SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number LIMIT 5`, function `sipHash64` would be evaluated after `ORDER BY` and `LIMIT`, which gives ~20x speed up. [#35623](https://github.com/ClickHouse/ClickHouse/pull/35623) ([Nikita Taranov](https://github.com/nickitat)). +* narrow mutex scope when setenv LIBHDFS3_CONF related issue [#35292](https://github.com/ClickHouse/ClickHouse/issues/35292). [#35646](https://github.com/ClickHouse/ClickHouse/pull/35646) ([shuchaome](https://github.com/shuchaome)). +* Improve performance of `hasAll` function using specializations for SSE and AVX2. Author @youennL-cs. [#35723](https://github.com/ClickHouse/ClickHouse/pull/35723) ([Maksim Kita](https://github.com/kitaisreal)). +* - The explain statement of GLOBAL JOIN two distributed tables can speed up 100x: explain plan select ... from t1_dist global join t2_dist on ... explain pipeline select ... from t1_dist global join t2_dist on ... [#36055](https://github.com/ClickHouse/ClickHouse/pull/36055) ([何李夫](https://github.com/helifu)). +* 2 optimizations: - Optimize trivail count hive query - Speed up hive query by caching metadata of hive file. [#36082](https://github.com/ClickHouse/ClickHouse/pull/36082) ([李扬](https://github.com/taiyang-li)). + +#### Improvement +* ... [#21474](https://github.com/ClickHouse/ClickHouse/pull/21474) ([nvartolomei](https://github.com/nvartolomei)). +* As talked in [issue 27025](https://github.com/ClickHouse/ClickHouse/issues/27025), there is an improvement of the HasAll function using SIMD instruction (SSE and AVX2). Gtest tests have also been added. [#27653](https://github.com/ClickHouse/ClickHouse/pull/27653) ([youennL-cs](https://github.com/youennL-cs)). +* Proper support of setting `max_rows_to_read` in case of reading in order of sorting key and specified limit. Previously the exception `Limit for rows or bytes to read exceeded` could be thrown even if query actually requires to read less amount of rows. [#33230](https://github.com/ClickHouse/ClickHouse/pull/33230) ([Anton Popov](https://github.com/CurtizJ)). +* INTERVAL improvement - can be used with `[MILLI|MICRO|NANO]SECOND`. Added `toStartOf[Milli|Micro|Nano]second()` functions. Added `[add|subtract][Milli|Micro|Nano]second()`. [#34353](https://github.com/ClickHouse/ClickHouse/pull/34353) ([Andrey Zvonov](https://github.com/zvonand)). +* System log tables allow to specify COMMENT in ENGINE declaration. Closes [#33768](https://github.com/ClickHouse/ClickHouse/issues/33768). [#34536](https://github.com/ClickHouse/ClickHouse/pull/34536) ([Maksim Kita](https://github.com/kitaisreal)). +* added sanity checks on server startup (available memory and disk space, max thread count, etc). [#34566](https://github.com/ClickHouse/ClickHouse/pull/34566) ([Sergei Trifonov](https://github.com/serxa)). +* Use minmax index for orc/parquet file in Hive Engine. Related pr: https://github.com/ClickHouse-Extras/arrow/pull/10. [#34631](https://github.com/ClickHouse/ClickHouse/pull/34631) ([李扬](https://github.com/taiyang-li)). +* If `port` is not specified in cluster configuration, default server port will be used. This closes [#34769](https://github.com/ClickHouse/ClickHouse/issues/34769). [#34772](https://github.com/ClickHouse/ClickHouse/pull/34772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added better error messages in case of connection failed to MySQL. Closes [#35128](https://github.com/ClickHouse/ClickHouse/issues/35128). [#35234](https://github.com/ClickHouse/ClickHouse/pull/35234) ([zzsmdfj](https://github.com/zzsmdfj)). +* Add function `getTypeSerializationStreams`. For a specified type (which is detected from column), it returns an array with all the serialization substream paths. This function is useful mainly for developers. [#35290](https://github.com/ClickHouse/ClickHouse/pull/35290) ([李扬](https://github.com/taiyang-li)). +* - wchc operation is expensive and should not be in the four_letter_word_white_list defaults. [#35320](https://github.com/ClickHouse/ClickHouse/pull/35320) ([zhangyuli1](https://github.com/zhangyuli1)). +* Added an ability to specify cluster secret in replicated database. [#35333](https://github.com/ClickHouse/ClickHouse/pull/35333) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add a new kind of row policies named `simple`. Before this PR we had two kinds or row policies: `permissive` and `restrictive`. A `simple` row policy adds a new filter on a table without any side-effects like it was for permissive and restrictive policies. [#35345](https://github.com/ClickHouse/ClickHouse/pull/35345) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove testmode option, enable it unconditionally. [#35354](https://github.com/ClickHouse/ClickHouse/pull/35354) ([Kseniia Sumarokova](https://github.com/kssenii)). +* For table function `s3cluster` or `HDFSCluster` or `hive`, we can't get right `AccessType` by `StorageFactory::instance().getSourceAccessType(getStorageTypeName())`. This pr fix it. [#35365](https://github.com/ClickHouse/ClickHouse/pull/35365) ([李扬](https://github.com/taiyang-li)). +* For lts releases packages will be pushed to both lts and stable repos. [#35382](https://github.com/ClickHouse/ClickHouse/pull/35382) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Support uuid for postgres engines. Closes [#35384](https://github.com/ClickHouse/ClickHouse/issues/35384). [#35403](https://github.com/ClickHouse/ClickHouse/pull/35403) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add arguments `--user`, `--password`, `--host`, `--port` for clickhouse-diagnostics. [#35422](https://github.com/ClickHouse/ClickHouse/pull/35422) ([李扬](https://github.com/taiyang-li)). +* fix INSERT INTO table FROM INFILE does not display progress bar. [#35429](https://github.com/ClickHouse/ClickHouse/pull/35429) ([xiedeyantu](https://github.com/xiedeyantu)). +* Allow server to bind to low-numbered ports (e.g. 443). ClickHouse installation script will set `cap_net_bind_service` to the binary file. [#35451](https://github.com/ClickHouse/ClickHouse/pull/35451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add settings `input_format_orc_case_insensitive_column_matching`, `input_format_arrow_case_insensitive_column_matching`, and `input_format_parquet_case_insensitive_column_matching` which allows ClickHouse to use case insensitive matching of columns while reading data from ORC, Arrow or Parquet files. [#35459](https://github.com/ClickHouse/ClickHouse/pull/35459) ([Antonio Andelic](https://github.com/antonio2368)). +* - Add explicit table info to the scan node of query plan and pipeline. [#35460](https://github.com/ClickHouse/ClickHouse/pull/35460) ([何李夫](https://github.com/helifu)). +* Propagate query and session settings for distributed DDL queries. Setting `distributed_ddl_entry_format_version` is set to 2 by default now. [#35463](https://github.com/ClickHouse/ClickHouse/pull/35463) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add sizes of subcolumns to `system.parts_columns` table. [#35488](https://github.com/ClickHouse/ClickHouse/pull/35488) ([Anton Popov](https://github.com/CurtizJ)). +* It was possible to get stack overflow in distributed queries if one of the settings `async_socket_for_remote` and `use_hedged_requests` is enabled while parsing very deeply nested data type (at least in debug build). Closes [#35509](https://github.com/ClickHouse/ClickHouse/issues/35509). [#35524](https://github.com/ClickHouse/ClickHouse/pull/35524) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve pasting performance and compatibility of clickhouse-client. This helps [#35501](https://github.com/ClickHouse/ClickHouse/issues/35501). [#35541](https://github.com/ClickHouse/ClickHouse/pull/35541) ([Amos Bird](https://github.com/amosbird)). +* Added a support for automatic schema inference to `s3Cluster` table function. Synced the signatures of `s3 ` and `s3Cluster`. [#35544](https://github.com/ClickHouse/ClickHouse/pull/35544) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use multiple threads to download objects from S3. Downloading is controllable using `max_download_threads` and `max_download_buffer_size` settings. [#35571](https://github.com/ClickHouse/ClickHouse/pull/35571) ([Antonio Andelic](https://github.com/antonio2368)). +* Deduce absolute hdfs config path. [#35572](https://github.com/ClickHouse/ClickHouse/pull/35572) ([李扬](https://github.com/taiyang-li)). +* - Use some tweaks and heuristics to determine numbers, strings, arrays, tuples and maps in CSV, TSV and TSVRaw data formats. Add setting `input_format_csv_use_best_effort_in_schema_inference` for CSV format that enables/disables using these heuristics, if it's disabled, we treat everything as string. Add similar setting `input_format_tsv_use_best_effort_in_schema_inference` for TSV/TSVRaw format. These settings are enabled by default. - Add Maps support for schema inference in Values format. - Fix possible segfault in schema inference in Values format. - Allow to skip columns with unsupported types in Arrow/ORC/Parquet formats. Add corresponding settings for it: `input_format_{parquet|orc|arrow}_skip_columns_with_unsupported_types_in_schema_inference`. These settings are disabled by default. - Allow to convert a column with type Null to a Nullable column with all NULL values in Arrow/Parquet formats. - Allow to specify column names in schema inference via setting `column_names_for_schema_inference` for formats that don't contain column names (like CSV, TSV, JSONCompactEachRow, etc) - Fix schema inference in ORC/Arrow/Parquet formats in terms of working with Nullable columns. Previously all inferred types were not Nullable and it blocked reading Nullable columns from data, now it's fixed and all inferred types are always Nullable (because we cannot understand that column is Nullable or not by reading the schema). - Fix schema inference in Template format with CSV escaping rules. [#35582](https://github.com/ClickHouse/ClickHouse/pull/35582) ([Kruglov Pavel](https://github.com/Avogar)). +* Add parallel parsing and schema inference for format `JSONAsObject`. [#35592](https://github.com/ClickHouse/ClickHouse/pull/35592) ([Anton Popov](https://github.com/CurtizJ)). +* Added support for schema inference for `hdfsCluster`. [#35602](https://github.com/ClickHouse/ClickHouse/pull/35602) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* - Improve the pipeline description for JOIN. [#35612](https://github.com/ClickHouse/ClickHouse/pull/35612) ([何李夫](https://github.com/helifu)). +* Support schema inference for type `Object` in format `JSONEachRow`. Allow to convert columns of type `Map` to columns of type `Object`. [#35629](https://github.com/ClickHouse/ClickHouse/pull/35629) ([Anton Popov](https://github.com/CurtizJ)). +* Add profile event counter `AsyncInsertBytes` about size of async INSERTs. [#35644](https://github.com/ClickHouse/ClickHouse/pull/35644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added `is_secure` column to `system.query_log` which denotes if the client is using a secure connection over TCP or HTTP. [#35705](https://github.com/ClickHouse/ClickHouse/pull/35705) ([Antonio Andelic](https://github.com/antonio2368)). +* closes [#35641](https://github.com/ClickHouse/ClickHouse/issues/35641) Allow EPHEMERAL without explicit default expression. [#35706](https://github.com/ClickHouse/ClickHouse/pull/35706) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix send_logs_level for clickhouse local. Closes [#35653](https://github.com/ClickHouse/ClickHouse/issues/35653). [#35716](https://github.com/ClickHouse/ClickHouse/pull/35716) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve columns ordering in schema inference for formats TSKV and JSONEachRow, closes [#35640](https://github.com/ClickHouse/ClickHouse/issues/35640). Don't stop schema inference when reading empty row in schema inference for formats TSKV and JSONEachRow. [#35724](https://github.com/ClickHouse/ClickHouse/pull/35724) ([Kruglov Pavel](https://github.com/Avogar)). +* Add new setting `input_format_json_read_bools_as_numbers` that allows to infer and parse bools as numbers in JSON input formats. It's enabled by default. Suggested by @alexey-milovidov. [#35735](https://github.com/ClickHouse/ClickHouse/pull/35735) ([Kruglov Pavel](https://github.com/Avogar)). +* Respect remote_url_allow_hosts for hive. [#35743](https://github.com/ClickHouse/ClickHouse/pull/35743) ([李扬](https://github.com/taiyang-li)). +* Support schema inference for insert select with using `input` table function. Get schema from insertion table instead of inferring it from the data in case of insert select from table functions that support schema inference. Closes [#35639](https://github.com/ClickHouse/ClickHouse/issues/35639). [#35760](https://github.com/ClickHouse/ClickHouse/pull/35760) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve projection analysis to optimize trivial queries such as `count()`. [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) ([Amos Bird](https://github.com/amosbird)). +* support ALTER TABLE t DETACH PARTITION (ALL). [#35794](https://github.com/ClickHouse/ClickHouse/pull/35794) ([awakeljw](https://github.com/awakeljw)). +* Added an animation to the hourglass icon to indicate to the user that a query is running. [#35860](https://github.com/ClickHouse/ClickHouse/pull/35860) ([peledni](https://github.com/peledni)). +* Now some `ALTER MODIFY COLUMN` queries for `Arrays` and `Nullable` types can be done at metadata level without mutations. For example, alter from `Array(Enum8('Option1'=1))` to `Array(Enum8('Option1'=1, 'Option2'=2))`. [#35882](https://github.com/ClickHouse/ClickHouse/pull/35882) ([alesapin](https://github.com/alesapin)). +* Now it's not allowed to `ALTER TABLE ... RESET SETTING` for non-existing settings for MergeTree engines family. Fixes [#35816](https://github.com/ClickHouse/ClickHouse/issues/35816). [#35884](https://github.com/ClickHouse/ClickHouse/pull/35884) ([alesapin](https://github.com/alesapin)). +* Improve settings configuration for s3 storage / table function. [#35915](https://github.com/ClickHouse/ClickHouse/pull/35915) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add some basic metrics to monitor engine=Kafka tables. [#35916](https://github.com/ClickHouse/ClickHouse/pull/35916) ([filimonov](https://github.com/filimonov)). +* Now `kafka_num_consumers` can be bigger than amount of physical cores in case of low resource machine (less than 16 cores). [#35926](https://github.com/ClickHouse/ClickHouse/pull/35926) ([alesapin](https://github.com/alesapin)). +* Update unixodbc to mitigate CVE-2018-7485. [#35943](https://github.com/ClickHouse/ClickHouse/pull/35943) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Require mutations for per-table TTL only when it had been changed. [#35953](https://github.com/ClickHouse/ClickHouse/pull/35953) ([Azat Khuzhin](https://github.com/azat)). +* - Add `dns_max_consecutive_failures` setting to stop re-resolving cached DNS entries after a number of consecutive failures (5 by default). [#35956](https://github.com/ClickHouse/ClickHouse/pull/35956) ([Raúl Marín](https://github.com/Algunenano)). +* ASTPartition::formatImpl should output ALL while executing ALTER TABLE t DETACH PARTITION ALL. [#35987](https://github.com/ClickHouse/ClickHouse/pull/35987) ([awakeljw](https://github.com/awakeljw)). +* `clickhouse-keeper` starts answering 4-letter commands before getting the quorum. [#35992](https://github.com/ClickHouse/ClickHouse/pull/35992) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix wrong assertion in replxx which happens when navigating back the history when the first line of input is a newline. Mark as improvement because it only affects debug build. This fixes [#34511](https://github.com/ClickHouse/ClickHouse/issues/34511). [#36007](https://github.com/ClickHouse/ClickHouse/pull/36007) ([Amos Bird](https://github.com/amosbird)). +* If someone writes DEFAULT NULL in table definition, make data type Nullable. [#35887](https://github.com/ClickHouse/ClickHouse/issues/35887). [#36058](https://github.com/ClickHouse/ClickHouse/pull/36058) ([xiedeyantu](https://github.com/xiedeyantu)). +* Added `thread_id` and `query_id` columns to `system.zookeeper_log` table. [#36074](https://github.com/ClickHouse/ClickHouse/pull/36074) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Auto assign numbers for Enum elements. [#36101](https://github.com/ClickHouse/ClickHouse/pull/36101) ([awakeljw](https://github.com/awakeljw)). +* Reset thread name in `ThreadPool` to `ThreadPoolIdle` after job is done. This is to avoid displaying the old thread name for idle threads. This closes [#36114](https://github.com/ClickHouse/ClickHouse/issues/36114). [#36115](https://github.com/ClickHouse/ClickHouse/pull/36115) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support `UNSIGNED` modifier with unused parameters of `INT`. [#36126](https://github.com/ClickHouse/ClickHouse/pull/36126) ([awakeljw](https://github.com/awakeljw)). +* Add support for atomic exchange in OSX. [#36133](https://github.com/ClickHouse/ClickHouse/pull/36133) ([Raúl Marín](https://github.com/Algunenano)). +* Update the progress bar after receiving every ProfileEvents packet. This change must fix the showing of outdated profiling data in client. [#36202](https://github.com/ClickHouse/ClickHouse/pull/36202) ([Dmitry Novik](https://github.com/novikd)). +* Check ORC/Parquet/Arrow format magic bytes before loading file in memory to prevent high memory usage in case of wrong file format. [#36209](https://github.com/ClickHouse/ClickHouse/pull/36209) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow queries `insert into function file(...) select from` for files with formats that don't support schema inference. For example: `insert into function file(data.json) select 42` - such query didn't work previously. [#36211](https://github.com/ClickHouse/ClickHouse/pull/36211) ([Kruglov Pavel](https://github.com/Avogar)). +* Send both stdin data and data from query/data from infile in client. Previously client ignored stdin data in case of both sources were present. Closes [#36100](https://github.com/ClickHouse/ClickHouse/issues/36100). [#36254](https://github.com/ClickHouse/ClickHouse/pull/36254) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow missing columns for mongo storage. Closes [#36119](https://github.com/ClickHouse/ClickHouse/issues/36119). Closes [#26490](https://github.com/ClickHouse/ClickHouse/issues/26490). [#36272](https://github.com/ClickHouse/ClickHouse/pull/36272) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Input format parsers can synchronize after wrong value of `Bool` or `Map` data types (see the `input_format_allow_errors_*` settings). [#36333](https://github.com/ClickHouse/ClickHouse/pull/36333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check for harmful environment variables like `LD_PRELOAD` at startup. It makes sense in [Google Collab](https://colab.research.google.com/drive/1wzYn59PA9EDyra6356a8rUpwd3zUX0Zt?usp=sharing). This closes [#36340](https://github.com/ClickHouse/ClickHouse/issues/36340). [#36342](https://github.com/ClickHouse/ClickHouse/pull/36342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix [#36307](https://github.com/ClickHouse/ClickHouse/issues/36307) [#35891](https://github.com/ClickHouse/ClickHouse/issues/35891) possible range issues in automatic assigned enums, also fix error message. [#36352](https://github.com/ClickHouse/ClickHouse/pull/36352) ([awakeljw](https://github.com/awakeljw)). +* hex function support for Int128/Int256/UInt128/UInt256. [#36386](https://github.com/ClickHouse/ClickHouse/pull/36386) ([Memo](https://github.com/Joeywzr)). + +#### Bug Fix +* Add type checking when create materialized view. Try to close: [#23684](https://github.com/ClickHouse/ClickHouse/issues/23684). [#24896](https://github.com/ClickHouse/ClickHouse/pull/24896) ([hexiaoting](https://github.com/hexiaoting)). +* Avoid erasing columns from a block if it doesn't exist while reading data from Hive. [#35393](https://github.com/ClickHouse/ClickHouse/pull/35393) ([lgbo](https://github.com/lgbo-ustc)). +* Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)). +* In FileSegmentsHolder::~FileSegmentsHolder(), when a segment is set to detach, it will assert its state is empty. However, in FileSegment::completeImpl(), when detach is set to true, its state may be PARTIALLY_DOWNLOADED_NO_CONTINUATION or SKIP_CACHE or PARTIALLY_DOWNLOADED, thus cause error in FileSegmentsHolder::~FileSegmentsHolder(). ``` if (file_segment->detached) { /// This file segment is not owned by cache, so it will be destructed /// at this point, therefore no completion required. assert(file_segment->state() == FileSegment::State::EMPTY); file_segment_it = file_segments.erase(current_file_segment_it); continue; } ```. [#36452](https://github.com/ClickHouse/ClickHouse/pull/36452) ([Han Shukai](https://github.com/KinderRiven)). + +#### Build/Testing/Packaging Improvement +* Add backward compatibility check in stress test. Closes [#25088](https://github.com/ClickHouse/ClickHouse/issues/25088). [#27928](https://github.com/ClickHouse/ClickHouse/pull/27928) ([Kruglov Pavel](https://github.com/Avogar)). +* - Migrate package building to nfpm - Deprecate `release` script in favor of `packages/build` - Build everything in clickhouse/binary-builder image (cleanup: clickhouse/deb-builder) - Add symbol stripping to cmake (todo: use $prefix/lib/$bin_dir/clickhouse/$binary.debug) - Fix issue with DWARF symbols - Add Alpine APK packages - Rename `alien` to `additional_pkgs`. [#33664](https://github.com/ClickHouse/ClickHouse/pull/33664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a night scan and upload for coverity. [#34895](https://github.com/ClickHouse/ClickHouse/pull/34895) ([Boris Kuschel](https://github.com/bkuschel)). +* - Switch to libcxx / libcxxabi from LLVM 14. [#34906](https://github.com/ClickHouse/ClickHouse/pull/34906) ([Raúl Marín](https://github.com/Algunenano)). +* Add next batch of random settings in functional tests. [#35047](https://github.com/ClickHouse/ClickHouse/pull/35047) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix stress-test report in CI, now we upload the runlog with information about started stress tests only once. [#35093](https://github.com/ClickHouse/ClickHouse/pull/35093) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - [x] redo alpine image to use clean Dockerfile - [x] Create a script in tests/ci to build both ubuntu and alpine images - [x] Add clickhouse-keeper image (cc @nikitamikhaylov) - [x] Add build check to PullRequestCI - [x] Add a job to a ReleaseCI - [x] Add a job to MasterCI to build and push `clickhouse/clickhouse-server:head` and `clickhouse/clickhouse-keeper:head` images for each merged PR. [#35211](https://github.com/ClickHouse/ClickHouse/pull/35211) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reverse `--no-prestable` key to match the logic. [#35372](https://github.com/ClickHouse/ClickHouse/pull/35372) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* All disabled tests run longer than 30 seconds. [#35413](https://github.com/ClickHouse/ClickHouse/pull/35413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix copypaste error for clickhouse-keeper test. [#35428](https://github.com/ClickHouse/ClickHouse/pull/35428) ([zhangyuli1](https://github.com/zhangyuli1)). +* Fix failed tests in: https://s3.amazonaws.com/clickhouse-test-reports/35422/32348779fd0bac5276727cfc01e75c625ecc69b9/fuzzer_astfuzzerubsan,actions//report.html. [#35439](https://github.com/ClickHouse/ClickHouse/pull/35439) ([李扬](https://github.com/taiyang-li)). +* Apply black formatter to python code and add a per-commit check. [#35466](https://github.com/ClickHouse/ClickHouse/pull/35466) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a label to recognize a building task for every image. [#35583](https://github.com/ClickHouse/ClickHouse/pull/35583) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Clean-up after functional test 02167 ... [#35681](https://github.com/ClickHouse/ClickHouse/pull/35681) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Minor improvement in contrib/krb5 build configuration. [#35832](https://github.com/ClickHouse/ClickHouse/pull/35832) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Update URL in test visualizer from `play-ci` to `play` (it was moved). [#35872](https://github.com/ClickHouse/ClickHouse/pull/35872) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Highlight headers in the PR template. Improve description checking logging. [#35947](https://github.com/ClickHouse/ClickHouse/pull/35947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Running with podman is failing: it complains about specifying the same volume twice. [#35978](https://github.com/ClickHouse/ClickHouse/pull/35978) ([Roman Nikonov](https://github.com/nic11)). +* Add argument for total number of desired builds. [#35999](https://github.com/ClickHouse/ClickHouse/pull/35999) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Add `_le_` method for ClickHouseVersion - Fix auto_version for an existing tag - docker_server now supports getting the version from tags - Add python unit tests to backport workflow - Move version_arg to version_helper, add tests. [#36029](https://github.com/ClickHouse/ClickHouse/pull/36029) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Significant improvement in docker build-cache system. [#36041](https://github.com/ClickHouse/ClickHouse/pull/36041) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* After CVE-2022-24765 git needs additional config parameter when directory is owned by another user. [#36193](https://github.com/ClickHouse/ClickHouse/pull/36193) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add an option for build profiling (`-ftime-trace`). [#36318](https://github.com/ClickHouse/ClickHouse/pull/36318) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Delete old packaging infrastructure. [#36330](https://github.com/ClickHouse/ClickHouse/pull/36330) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fallback to a default event in case of broken API. [#36412](https://github.com/ClickHouse/ClickHouse/pull/36412) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Check a number of required reports in BuilderSpecialReport. [#36413](https://github.com/ClickHouse/ClickHouse/pull/36413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a labeling for `Revert` PRs. [#36422](https://github.com/ClickHouse/ClickHouse/pull/36422) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Disallow ALTER TTL for engines that does not support it, to avoid breaking ATTACH TABLE (closes [#33344](https://github.com/ClickHouse/ClickHouse/issues/33344)). [#33391](https://github.com/ClickHouse/ClickHouse/pull/33391) ([zhongyuankai](https://github.com/zhongyuankai)). +* Do not delay final part writing by default (fixes possible `Memory limit exceeded` during `INSERT` by adding `max_insert_delayed_streams_for_parallel_write` with default to 1000 for writes to s3 and disabled as before otherwise). [#34780](https://github.com/ClickHouse/ClickHouse/pull/34780) ([Azat Khuzhin](https://github.com/azat)). +* fix issue:input_format_null_as_default does not work for DEFAULT expressions Closes [#34890](https://github.com/ClickHouse/ClickHouse/issues/34890). [#35039](https://github.com/ClickHouse/ClickHouse/pull/35039) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix mutations in tables with enabled sparse columns. [#35284](https://github.com/ClickHouse/ClickHouse/pull/35284) ([Anton Popov](https://github.com/CurtizJ)). +* Fix schema inference for TSKV format while using small max_read_buffer_size. [#35332](https://github.com/ClickHouse/ClickHouse/pull/35332) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix partition pruning in case of comparison with constant in `WHERE`. If column and constant had different types, overflow was possible. Query could return an incorrect empty result. This fixes [#35304](https://github.com/ClickHouse/ClickHouse/issues/35304). [#35334](https://github.com/ClickHouse/ClickHouse/pull/35334) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)). +* Fix issue with non-existing directory https://github.com/ClickHouse/ClickHouse/runs/5588046879?check_suite_focus=true. [#35376](https://github.com/ClickHouse/ClickHouse/pull/35376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix wrong assets path in release workflow. [#35379](https://github.com/ClickHouse/ClickHouse/pull/35379) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Cache fixes for high concurrency on corner cases. [#35381](https://github.com/ClickHouse/ClickHouse/pull/35381) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix working with columns that are not needed in query in Arrow/Parquet/ORC formats, it prevents possible errors like `Unsupported type of an input column ` when file contains column with unsupported type and we don't use it in query. [#35406](https://github.com/ClickHouse/ClickHouse/pull/35406) ([Kruglov Pavel](https://github.com/Avogar)). +* Skip empty chunks in GroupingAggregatedTransform. [#35417](https://github.com/ClickHouse/ClickHouse/pull/35417) ([Nikita Taranov](https://github.com/nickitat)). +* Now merges executed with zero copy replication will not spam logs with message `Found parts with the same min block and with the same max block as the missing part _ on replica _. Hoping that it will eventually appear as a result of a merge.`. [#35430](https://github.com/ClickHouse/ClickHouse/pull/35430) ([alesapin](https://github.com/alesapin)). +* Fix excessive logging when using S3 as backend for MergeTree or as separate table engine/function. Fixes [#30559](https://github.com/ClickHouse/ClickHouse/issues/30559). [#35434](https://github.com/ClickHouse/ClickHouse/pull/35434) ([alesapin](https://github.com/alesapin)). +* Fix wrong result of datetime64 when negative. Close [#34831](https://github.com/ClickHouse/ClickHouse/issues/34831). [#35440](https://github.com/ClickHouse/ClickHouse/pull/35440) ([李扬](https://github.com/taiyang-li)). +* Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector to DB::ColumnVector'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug in Keeper which can lead to unstable client connections. Introduced in [#35031](https://github.com/ClickHouse/ClickHouse/issues/35031). [#35498](https://github.com/ClickHouse/ClickHouse/pull/35498) ([alesapin](https://github.com/alesapin)). +* Fix crash for function `throwIf` with constant arguments. [#35500](https://github.com/ClickHouse/ClickHouse/pull/35500) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash during short circuit function evaluation when one of arguments is nullable constant. Closes [#35497](https://github.com/ClickHouse/ClickHouse/issues/35497). Closes [#35496](https://github.com/ClickHouse/ClickHouse/issues/35496). [#35502](https://github.com/ClickHouse/ClickHouse/pull/35502) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix parsing of IPv6 addresses longer than 39 characters. Closes [#34022](https://github.com/ClickHouse/ClickHouse/issues/34022). [#35539](https://github.com/ClickHouse/ClickHouse/pull/35539) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed return type deduction for `caseWithExpression`. The type of the ELSE branch is now correctly taken into account. [#35576](https://github.com/ClickHouse/ClickHouse/pull/35576) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix s3 engine getting virtual columns. Closes [#35411](https://github.com/ClickHouse/ClickHouse/issues/35411). [#35586](https://github.com/ClickHouse/ClickHouse/pull/35586) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix version string setting in version_helper.py. [#35589](https://github.com/ClickHouse/ClickHouse/pull/35589) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix headers with named collections, add compression_method. Closes [#35273](https://github.com/ClickHouse/ClickHouse/issues/35273). Closes [#35269](https://github.com/ClickHouse/ClickHouse/issues/35269). [#35593](https://github.com/ClickHouse/ClickHouse/pull/35593) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Setting `database_atomic_wait_for_drop_and_detach_synchronously` worked incorrectly for `ATTACH TABLE` query when previously detached table is still in use, It's fixed. [#35594](https://github.com/ClickHouse/ClickHouse/pull/35594) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible segfault in materialised postgresql which happened if exception occurred when data, collected in memory, was synced into underlying tables. Closes [#35611](https://github.com/ClickHouse/ClickHouse/issues/35611). [#35614](https://github.com/ClickHouse/ClickHouse/pull/35614) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix `HashJoin` when columns with `LowCardinality` type are used. This closes [#35548](https://github.com/ClickHouse/ClickHouse/issues/35548). [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616) ([Antonio Andelic](https://github.com/antonio2368)). +* Check remote_url_allow_hosts before schema inference in URL engine Closes [#35064](https://github.com/ClickHouse/ClickHouse/issues/35064). [#35619](https://github.com/ClickHouse/ClickHouse/pull/35619) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix positional arguments with aliases. Closes [#35600](https://github.com/ClickHouse/ClickHouse/issues/35600). [#35620](https://github.com/ClickHouse/ClickHouse/pull/35620) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection analysis which might lead to wrong query result when IN subquery is used. This fixes [#35336](https://github.com/ClickHouse/ClickHouse/issues/35336). [#35631](https://github.com/ClickHouse/ClickHouse/pull/35631) ([Amos Bird](https://github.com/amosbird)). +* Fix usage of quota with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)). +* Fix server crash when large number of arguments are passed into `format` function. Please refer to the test file and see how to reproduce the crash. [#35651](https://github.com/ClickHouse/ClickHouse/pull/35651) ([Amos Bird](https://github.com/amosbird)). +* Fix part checking logic for parts with projections. Error happened when projection and main part had different types. This is similar to https://github.com/ClickHouse/ClickHouse/pull/33774 . The bug is addressed by @caoyang10. [#35667](https://github.com/ClickHouse/ClickHouse/pull/35667) ([Amos Bird](https://github.com/amosbird)). +* Fix check asof join key nullability, close [#35565](https://github.com/ClickHouse/ClickHouse/issues/35565). [#35674](https://github.com/ClickHouse/ClickHouse/pull/35674) ([Vladimir C](https://github.com/vdimir)). +* Fix possible loss of subcolumns in type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)). +* Enable build with JIT compilation by default. [#35683](https://github.com/ClickHouse/ClickHouse/pull/35683) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible `Can't adjust last granule` exception while reading subcolumns of type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug in creating materialized view with subquery after server restart. Materialized view was not getting updated after inserts into underlying table after server restart. Closes [#35511](https://github.com/ClickHouse/ClickHouse/issues/35511). [#35691](https://github.com/ClickHouse/ClickHouse/pull/35691) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix dropping non-empty database in clickhouse local. Closes [#35692](https://github.com/ClickHouse/ClickHouse/issues/35692). [#35711](https://github.com/ClickHouse/ClickHouse/pull/35711) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix any/all(subquery) implementation. Closes [#35489](https://github.com/ClickHouse/ClickHouse/issues/35489). [#35727](https://github.com/ClickHouse/ClickHouse/pull/35727) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug in conversion from custom types to string that could lead to segfault or unexpected error messages. Closes [#35752](https://github.com/ClickHouse/ClickHouse/issues/35752). [#35755](https://github.com/ClickHouse/ClickHouse/pull/35755) ([Kruglov Pavel](https://github.com/Avogar)). +* Now metadata for broken parts will be removed from metadata cache (introduced in [#32928](https://github.com/ClickHouse/ClickHouse/issues/32928)) on server start. [#35759](https://github.com/ClickHouse/ClickHouse/pull/35759) ([chen9t](https://github.com/chen9t)). +* fix filebuffer pos in RemoteReadBuffer When RemoteReadBuffer is consumed, its pos will increase, for example in HadoopSnappyReadBuffer::nextImpl. ![image](https://user-images.githubusercontent.com/80669699/160880640-8535a701-63bd-42e9-a2c2-e5f215bcd96b.png). [#35771](https://github.com/ClickHouse/ClickHouse/pull/35771) ([shuchaome](https://github.com/shuchaome)). +* Fixes parsing of the arguments of the functions `extract`. Fixes [#35751](https://github.com/ClickHouse/ClickHouse/issues/35751). [#35799](https://github.com/ClickHouse/ClickHouse/pull/35799) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix bug in indexes of not presented columns in -WithNames formats that led to error `INCORRECT_NUMBER_OF_COLUMNS ` when the number of columns is more than 256. Closes [#35793](https://github.com/ClickHouse/ClickHouse/issues/35793). [#35803](https://github.com/ClickHouse/ClickHouse/pull/35803) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inserts to columns of type `Object` in case when there is data related to several partitions in insert query. [#35806](https://github.com/ClickHouse/ClickHouse/pull/35806) ([Anton Popov](https://github.com/CurtizJ)). +* Respect only quota & period from groups, ignore shares (which are not really limit the number of the cores which can be used). [#35815](https://github.com/ClickHouse/ClickHouse/pull/35815) ([filimonov](https://github.com/filimonov)). +* Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)). +* fix issue: [#34966](https://github.com/ClickHouse/ClickHouse/issues/34966). [#35840](https://github.com/ClickHouse/ClickHouse/pull/35840) ([zzsmdfj](https://github.com/zzsmdfj)). +* Disable `session_log` because memory safety issue has been found by fuzzing. See [#35714](https://github.com/ClickHouse/ClickHouse/issues/35714). [#35873](https://github.com/ClickHouse/ClickHouse/pull/35873) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix formatting of INSERT INFILE queries (missing quotes). [#35886](https://github.com/ClickHouse/ClickHouse/pull/35886) ([Azat Khuzhin](https://github.com/azat)). +* Fixed GA not reporting events. [#35935](https://github.com/ClickHouse/ClickHouse/pull/35935) ([peledni](https://github.com/peledni)). +* Fix reading from `Kafka` tables when `kafka_num_consumers > 1` and `kafka_thread_per_consumer = 0`. Returns parallel & multithreaded reading, accidentally broken in 21.11. Closes [#35153](https://github.com/ClickHouse/ClickHouse/issues/35153). [#35973](https://github.com/ClickHouse/ClickHouse/pull/35973) ([filimonov](https://github.com/filimonov)). +* Fix performance regression of scalar query optimization. [#35986](https://github.com/ClickHouse/ClickHouse/pull/35986) ([Amos Bird](https://github.com/amosbird)). +* Fix error while moving table with `JOIN` engine from `Ordinary` database to `Atomic`, close [#35686](https://github.com/ClickHouse/ClickHouse/issues/35686). [#35995](https://github.com/ClickHouse/ClickHouse/pull/35995) ([Vladimir C](https://github.com/vdimir)). +* Fix error `Empty list of columns in SELECT query` in CROSS JOIN close [#35672](https://github.com/ClickHouse/ClickHouse/issues/35672). [#36033](https://github.com/ClickHouse/ClickHouse/pull/36033) ([Vladimir C](https://github.com/vdimir)). +* Fix possible incorrect result of `WINDOW` functions in queries with `LIMIT` which was caused by wrong limit-push-down query plan optimization. Fixes [#36071](https://github.com/ClickHouse/ClickHouse/issues/36071) and [#23125](https://github.com/ClickHouse/ClickHouse/issues/23125). [#36075](https://github.com/ClickHouse/ClickHouse/pull/36075) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Throw an exception when CH cannot execute a file instead of displaying success and silently failing. [#36088](https://github.com/ClickHouse/ClickHouse/pull/36088) ([Julian Gilyadov](https://github.com/israelg99)). +* Fix window view when is proc time and window kind larger than day, see code comment. [#36109](https://github.com/ClickHouse/ClickHouse/pull/36109) ([flynn](https://github.com/ucasfl)). +* Fix bug of read buffer from hdfs. ReadBufferFromHDFSImpl::offset was misused as offset of working_buffer, but it is file offset. cc @kssenii. [#36153](https://github.com/ClickHouse/ClickHouse/pull/36153) ([李扬](https://github.com/taiyang-li)). +* Fix crash in ParallelReadBuffer. [#36169](https://github.com/ClickHouse/ClickHouse/pull/36169) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to convert empty strings to empty values of type `Objects`. [#36179](https://github.com/ClickHouse/ClickHouse/pull/36179) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible segfault in schema inference for JSON formats. [#36195](https://github.com/ClickHouse/ClickHouse/pull/36195) ([Kruglov Pavel](https://github.com/Avogar)). +* `CREATE TABLE ... AS` might fail with `Replica ... already exists` even if `ReplicatedMergeTree` table was created with default arguments. It's fixed. Now `{uuid}` macro is not unfolded when saving table metadata. Therefore, it's not allowed to move `ReplicatedMergeTree` table from `Atomic` to `Ordinary` database if `zookeeper_path` contains `{uuid}` macro (or table was created with default engine arguments). Fixes [#35577](https://github.com/ClickHouse/ClickHouse/issues/35577). [#36200](https://github.com/ClickHouse/ClickHouse/pull/36200) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix reading of empty arrays in reverse order (in queries with descending sorting by prefix of primary key). [#36215](https://github.com/ClickHouse/ClickHouse/pull/36215) ([Anton Popov](https://github.com/CurtizJ)). +* Play UI was not able to display some resultsets, for example `SELECT * FROM dish`. [#36283](https://github.com/ClickHouse/ClickHouse/pull/36283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in ReadBufferFromHDFS in debug mode. image. [#36287](https://github.com/ClickHouse/ClickHouse/pull/36287) ([zhanglistar](https://github.com/zhanglistar)). +* Fix "Cannot find column" error for distributed queries with LIMIT BY. [#36454](https://github.com/ClickHouse/ClickHouse/pull/36454) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "[WIP] New row policies"'. [#35454](https://github.com/ClickHouse/ClickHouse/pull/35454) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'remove ATOMIC_FLAG_INIT: depreciated in C++20 and warns in clang-14'. [#35785](https://github.com/ClickHouse/ClickHouse/pull/35785) ([Brendan Cox](https://github.com/justnoise)). +* NO CL ENTRY: 'Revert "Added support for schema inference for `hdfsCluster`"'. [#35802](https://github.com/ClickHouse/ClickHouse/pull/35802) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: '[Snyk] Security upgrade mkdocs from 1.1.2 to 1.3.0'. [#35864](https://github.com/ClickHouse/ClickHouse/pull/35864) ([Snyk bot](https://github.com/snyk-bot)). +* NO CL ENTRY: 'Revert "Format changes for new docs"'. [#35894](https://github.com/ClickHouse/ClickHouse/pull/35894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "clang-tidy report issues with Medium priority"'. [#35941](https://github.com/ClickHouse/ClickHouse/pull/35941) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix crash in ParallelReadBuffer"'. [#36210](https://github.com/ClickHouse/ClickHouse/pull/36210) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### Bug Fix (prestable release) + +* call RemoteQueryExecutor with original_query instead of an rewritten query, elimate the AMBIGUOUS_COLUMN_NAME exception. [#35748](https://github.com/ClickHouse/ClickHouse/pull/35748) ([lgbo](https://github.com/lgbo-ustc)). + diff --git a/docs/changelogs/v22.4.2.1-stable.md b/docs/changelogs/v22.4.2.1-stable.md new file mode 100644 index 00000000000..c40bf8d92c9 --- /dev/null +++ b/docs/changelogs/v22.4.2.1-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v22.4.2.1-stable FIXME as compared to v22.4.1.2305-prestable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix projection analysis which might lead to wrong query result when IN subquery is used. This fixes [#35336](https://github.com/ClickHouse/ClickHouse/issues/35336). [#35631](https://github.com/ClickHouse/ClickHouse/pull/35631) ([Amos Bird](https://github.com/amosbird)). + diff --git a/docs/changelogs/v22.4.3.3-stable.md b/docs/changelogs/v22.4.3.3-stable.md new file mode 100644 index 00000000000..5ab7872f880 --- /dev/null +++ b/docs/changelogs/v22.4.3.3-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v22.4.3.3-stable FIXME as compared to v22.4.2.1-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36582](https://github.com/ClickHouse/ClickHouse/issues/36582): Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). + diff --git a/docs/changelogs/v22.4.4.7-stable.md b/docs/changelogs/v22.4.4.7-stable.md new file mode 100644 index 00000000000..794082328df --- /dev/null +++ b/docs/changelogs/v22.4.4.7-stable.md @@ -0,0 +1,8 @@ +### ClickHouse release v22.4.4.7-stable FIXME as compared to v22.4.3.3-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#36582](https://github.com/ClickHouse/ClickHouse/issues/36582): Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#36673](https://github.com/ClickHouse/ClickHouse/issues/36673): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v22.4.5.9-stable.md b/docs/changelogs/v22.4.5.9-stable.md new file mode 100644 index 00000000000..63dfd117816 --- /dev/null +++ b/docs/changelogs/v22.4.5.9-stable.md @@ -0,0 +1,9 @@ +### ClickHouse release v22.4.5.9-stable FIXME as compared to v22.4.4.7-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#36635](https://github.com/ClickHouse/ClickHouse/issues/36635): Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#36794](https://github.com/ClickHouse/ClickHouse/issues/36794): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#36926](https://github.com/ClickHouse/ClickHouse/issues/36926): Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 82cd9273680..35d124d1f15 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -694,6 +694,49 @@ auto s = std::string{"Hello"}; **2.** Exception specifiers from C++03 are not used. +**3.** Constructs which have convenient syntactic sugar in modern C++, e.g. + +``` +// Traditional way without syntactic sugar +template ::value, void>> // SFINAE via std::enable_if, usage of ::value +std::pair func(const E & e) // explicitly specified return type +{ + if (elements.count(e)) // .count() membership test + { + // ... + } + + elements.erase( + std::remove_if( + elements.begin(), elements.end(), + [&](const auto x){ + return x == 1; + }), + elements.end()); // remove-erase idiom + + return std::make_pair(1, 2); // create pair via make_pair() +} + +// With syntactic sugar (C++14/17/20) +template +requires std::same_v // SFINAE via C++20 concept, usage of C++14 template alias +auto func(const E & e) // auto return type (C++14) +{ + if (elements.contains(e)) // C++20 .contains membership test + { + // ... + } + + elements.erase_if( + elements, + [&](const auto x){ + return x == 1; + }); // C++20 std::erase_if + + return {1, 2}; // or: return std::pair(1, 2); // create pair via initialization list or value initialization (C++17) +} +``` + ## Platform {#platform} **1.** We write code for a specific platform. diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 7779057bb74..4aba0506c2e 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from CLickHouse and it is available to general public. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. ### Running a Test Locally {#functional-test-locally} @@ -30,7 +30,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes ### Adding a New Test -To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 2b49603bd0a..f31a78bc1c4 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -127,22 +127,22 @@ After that downloaded archives should be unpacked and installed with installatio LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) export LATEST_VERSION -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION-amd64.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION-amd64.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION-amd64.tgz" -tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-common-static-$LATEST_VERSION-amd64.tgz" sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz" sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-server-$LATEST_VERSION-amd64.tgz" sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-client-$LATEST_VERSION-amd64.tgz" sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ``` diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index f21858ccc25..e382bbcddd8 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -9,6 +9,7 @@ ClickHouse can accept and return data in various formats. A format supported for results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: + | Format | Input | Output | |-------------------------------------------------------------------------------------------|-------|--------| | [TabSeparated](#tabseparated) | ✔ | ✔ | @@ -195,7 +196,7 @@ This format is also available under the name `TSVWithNames`. Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. The first row with names is processed the same way as in `TabSeparatedWithNames` format. If setting [input_format_with_types_use_header](../operations/settings/settings.md#settings-input_format_with_types_use_header) is set to 1, -the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. +the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. This format is also available under the name `TSVWithNamesAndTypes`. @@ -790,7 +791,7 @@ The query `SELECT * FROM UserActivity FORMAT JSONEachRow` returns: Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 sequences. Values are escaped in the same way as for `JSON`. -:::info +:::info Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information. ::: @@ -1414,7 +1415,7 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` -:::warning +:::warning Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. ::: @@ -1631,7 +1632,7 @@ When working with the `Regexp` format, you can use the following settings: - Escaped (similarly to [TSV](#tabseparated)) - Quoted (similarly to [Values](#data-format-values)) - Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw)) - + - `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exeption in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. **Usage** diff --git a/docs/en/interfaces/postgresql.md b/docs/en/interfaces/postgresql.md new file mode 100644 index 00000000000..a7fd33b38c1 --- /dev/null +++ b/docs/en/interfaces/postgresql.md @@ -0,0 +1,72 @@ +--- +sidebar_position: 20 +sidebar_label: PostgreSQL Interface +--- + +# PostgreSQL Interface + +ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directy supported by ClickHouse (for example, Amazon Redshift). + +To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder: + +```xml + + 9005 + +``` + +Startup your ClickHouse server and look for a log message similar to the following that mentions **Listening for PostgreSQL compatibility protocol**: + +```response +{} Application: Listening for PostgreSQL compatibility protocol: 127.0.0.1:9005 +``` + +## Connect psql to ClickHouse + +The following command demonstrates how to connect the PostgreSQL client `psql` to ClickHouse: + +```bash +psql -p [port] -h [hostname] -U [username] [database_name] +``` + +For example: + +```bash +psql -p 9005 -h 127.0.0.1 -U alice default +``` + +:::note +The `psql` client requires a login with a password, so you will not be able connect using the `default` user with no password. Either assign a password to the `default` user, or login as a different user. +::: + +The `psql` client prompts for the password: + +```response +Password for user alice: +psql (14.2, server 22.3.1.1) +WARNING: psql major version 14, server major version 22. + Some psql features might not work. +Type "help" for help. + +default=> +``` + +And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse. + +:::caution +The PostgreSQL protocol currently only supports plain-text passwords. +::: + +## Using SSL + +If you have SSL/TLS configured on your ClickHouse instance, then `postgresql_port` will use the same settings (the port is shared for both secure and unsecure clients). + +Each client has their own method of how to connect using SSL. The following command demonstrates how to pass in the certificates and key to securely connect `psql` to ClickHouse: + +```bash +psql "port=9005 host=127.0.0.1 user=alice dbname=default sslcert=/path/to/certificate.pem sslkey=/path/to/key.pem sslrootcert=/path/to/rootcert.pem sslmode=verify-ca" +``` + +View the [PostgreSQL docs](https://jdbc.postgresql.org/documentation/head/ssl-client.html) for more details on their SSL settings. + +[Original article](https://clickhouse.com/docs/en/interfaces/postgresql) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index e4b1fdd3bbb..bd05f3b4ad2 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -13,7 +13,7 @@ Simhash is a hash function, which returns close hash values for close (similar) [Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. -``` sql +```sql halfMD5(par1, ...) ``` @@ -30,11 +30,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. **Example** -``` sql +```sql SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS halfMD5hash, toTypeName(halfMD5hash) AS type; ``` -``` text +```response ┌────────halfMD5hash─┬─type───┐ │ 186182704141653334 │ UInt64 │ └────────────────────┴────────┘ @@ -54,7 +54,7 @@ If you want to get the same result as output by the md5sum utility, use lower(he Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value. -``` sql +```sql sipHash64(par1,...) ``` @@ -77,11 +77,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. **Example** -``` sql +```sql SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS SipHash, toTypeName(SipHash) AS type; ``` -``` text +```response ┌──────────────SipHash─┬─type───┐ │ 13726873534472839665 │ UInt64 │ └──────────────────────┴────────┘ @@ -93,7 +93,7 @@ Produces a 128-bit [SipHash](https://131002.net/siphash/) hash value. Differs fr **Syntax** -``` sql +```sql sipHash128(par1,...) ``` @@ -111,13 +111,13 @@ Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). Query: -``` sql +```sql SELECT hex(sipHash128('foo', '\x01', 3)); ``` Result: -``` text +```response ┌─hex(sipHash128('foo', '', 3))────┐ │ 9DE516A64A414D4B1B609415E4523F24 │ └──────────────────────────────────┘ @@ -127,7 +127,7 @@ Result: Produces a 64-bit [CityHash](https://github.com/google/cityhash) hash value. -``` sql +```sql cityHash64(par1,...) ``` @@ -145,11 +145,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. Call example: -``` sql +```sql SELECT cityHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS CityHash, toTypeName(CityHash) AS type; ``` -``` text +```response ┌─────────────CityHash─┬─type───┐ │ 12072650598913549138 │ UInt64 │ └──────────────────────┴────────┘ @@ -157,7 +157,7 @@ SELECT cityHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 The following example shows how to compute the checksum of the entire table with accuracy up to the row order: -``` sql +```sql SELECT groupBitXor(cityHash64(*)) FROM table ``` @@ -177,7 +177,7 @@ Calculates SHA-1, SHA-224, SHA-256, SHA-512 hash from a string and returns the r **Syntax** -``` sql +```sql SHA1('s') ... SHA512('s') @@ -203,24 +203,62 @@ Use the [hex](../functions/encoding-functions.md#hex) function to represent the Query: -``` sql +```sql SELECT hex(SHA1('abc')); ``` Result: -``` text +```response ┌─hex(SHA1('abc'))─────────────────────────┐ │ A9993E364706816ABA3E25717850C26C9CD0D89D │ └──────────────────────────────────────────┘ ``` +## BLAKE3 {#blake3} + +Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). + +**Syntax** + +```sql +BLAKE3('s') +``` + +This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library. The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256. + +**Arguments** + +- s - input string for BLAKE3 hash calculation. [String](../data-types/string.md). + +**Return value** + +- BLAKE3 hash as a byte array with type FixedString(32). + +Type: [FixedString](../data-types/fixedstring.md). + +**Example** + +Use function [hex](../functions/encoding-functions.md#hex) to represent the result as a hex-encoded string. + +Query: +```sql +SELECT hex(BLAKE3('ABC')) +``` + +Result: +```sql +┌─hex(BLAKE3('ABC'))───────────────────────────────────────────────┐ +│ D1717274597CF0289694F75D96D444B992A096F1AFD8E7BBFA6EBB1D360FEDFC │ +└──────────────────────────────────────────────────────────────────┘ +``` + ## URLHash(url\[, N\]) {#urlhashurl-n} A fast, decent-quality non-cryptographic hash function for a string obtained from a URL using some type of normalization. `URLHash(s)` – Calculates a hash from a string without one of the trailing symbols `/`,`?` or `#` at the end, if present. `URLHash(s, N)` – Calculates a hash from a string up to the N level in the URL hierarchy, without one of the trailing symbols `/`,`?` or `#` at the end, if present. -Levels are the same as in URLHierarchy. +Levels are the same as in URLHierarchy. ## farmFingerprint64 {#farmfingerprint64} @@ -228,7 +266,7 @@ Levels are the same as in URLHierarchy. Produces a 64-bit [FarmHash](https://github.com/google/farmhash) or Fingerprint value. `farmFingerprint64` is preferred for a stable and portable value. -``` sql +```sql farmFingerprint64(par1, ...) farmHash64(par1, ...) ``` @@ -245,11 +283,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. **Example** -``` sql +```sql SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS FarmHash, toTypeName(FarmHash) AS type; ``` -``` text +```response ┌─────────────FarmHash─┬─type───┐ │ 17790458267262532859 │ UInt64 │ └──────────────────────┴────────┘ @@ -261,7 +299,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97 **Syntax** -``` sql +```sql SELECT javaHash('') ``` @@ -273,13 +311,13 @@ A `Int32` data type hash value. Query: -``` sql +```sql SELECT javaHash('Hello, world!'); ``` Result: -``` text +```response ┌─javaHash('Hello, world!')─┐ │ -1880044555 │ └───────────────────────────┘ @@ -291,7 +329,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97 **Syntax** -``` sql +```sql javaHashUTF16LE(stringUtf16le) ``` @@ -309,13 +347,13 @@ Correct query with UTF-16LE encoded string. Query: -``` sql +```sql SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')); ``` Result: -``` text +```response ┌─javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le'))─┐ │ 3556498 │ └──────────────────────────────────────────────────────────────┘ @@ -325,7 +363,7 @@ Result: Calculates `HiveHash` from a string. -``` sql +```sql SELECT hiveHash('') ``` @@ -341,13 +379,13 @@ Type: `hiveHash`. Query: -``` sql +```sql SELECT hiveHash('Hello, world!'); ``` Result: -``` text +```response ┌─hiveHash('Hello, world!')─┐ │ 267439093 │ └───────────────────────────┘ @@ -357,7 +395,7 @@ Result: Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/) hash value. -``` sql +```sql metroHash64(par1, ...) ``` @@ -371,11 +409,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. **Example** -``` sql +```sql SELECT metroHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MetroHash, toTypeName(MetroHash) AS type; ``` -``` text +```response ┌────────────MetroHash─┬─type───┐ │ 14235658766382344533 │ UInt64 │ └──────────────────────┴────────┘ @@ -391,7 +429,7 @@ For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1 Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value. -``` sql +```sql murmurHash2_32(par1, ...) murmurHash2_64(par1, ...) ``` @@ -407,11 +445,11 @@ Both functions take a variable number of input parameters. Arguments can be any **Example** -``` sql +```sql SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MurmurHash2, toTypeName(MurmurHash2) AS type; ``` -``` text +```response ┌──────────MurmurHash2─┬─type───┐ │ 11832096901709403633 │ UInt64 │ └──────────────────────┴────────┘ @@ -423,7 +461,7 @@ Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash val **Syntax** -``` sql +```sql gccMurmurHash(par1, ...) ``` @@ -441,7 +479,7 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT gccMurmurHash(1, 2, 3) AS res1, gccMurmurHash(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) AS res2 @@ -449,7 +487,7 @@ SELECT Result: -``` text +```response ┌─────────────────res1─┬────────────────res2─┐ │ 12384823029245979431 │ 1188926775431157506 │ └──────────────────────┴─────────────────────┘ @@ -459,7 +497,7 @@ Result: Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value. -``` sql +```sql murmurHash3_32(par1, ...) murmurHash3_64(par1, ...) ``` @@ -475,11 +513,11 @@ Both functions take a variable number of input parameters. Arguments can be any **Example** -``` sql +```sql SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; ``` -``` text +```response ┌─MurmurHash3─┬─type───┐ │ 2152717 │ UInt32 │ └─────────────┴────────┘ @@ -491,7 +529,7 @@ Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash valu **Syntax** -``` sql +```sql murmurHash3_128(expr) ``` @@ -509,13 +547,13 @@ Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). Query: -``` sql +```sql SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); ``` Result: -``` text +```response ┌─hex(murmurHash3_128('foo', 'foo', 'foo'))─┐ │ F8F7AD9B6CD4CF117A71E277E2EC2931 │ └───────────────────────────────────────────┘ @@ -525,7 +563,7 @@ Result: Calculates `xxHash` from a string. It is proposed in two flavors, 32 and 64 bits. -``` sql +```sql SELECT xxHash32('') OR @@ -543,13 +581,13 @@ Type: `xxHash`. Query: -``` sql +```sql SELECT xxHash32('Hello, world!'); ``` Result: -``` text +```response ┌─xxHash32('Hello, world!')─┐ │ 834093149 │ └───────────────────────────┘ @@ -567,7 +605,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql ngramSimHash(string[, ngramsize]) ``` @@ -586,13 +624,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT ngramSimHash('ClickHouse') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 1627567969 │ └────────────┘ @@ -606,7 +644,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql ngramSimHashCaseInsensitive(string[, ngramsize]) ``` @@ -625,13 +663,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT ngramSimHashCaseInsensitive('ClickHouse') AS Hash; ``` Result: -``` text +```response ┌──────Hash─┐ │ 562180645 │ └───────────┘ @@ -645,7 +683,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql ngramSimHashUTF8(string[, ngramsize]) ``` @@ -664,13 +702,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT ngramSimHashUTF8('ClickHouse') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 1628157797 │ └────────────┘ @@ -684,7 +722,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) ``` @@ -703,13 +741,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT ngramSimHashCaseInsensitiveUTF8('ClickHouse') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 1636742693 │ └────────────┘ @@ -723,7 +761,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql wordShingleSimHash(string[, shinglesize]) ``` @@ -742,13 +780,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT wordShingleSimHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 2328277067 │ └────────────┘ @@ -762,7 +800,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql wordShingleSimHashCaseInsensitive(string[, shinglesize]) ``` @@ -781,13 +819,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT wordShingleSimHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 2194812424 │ └────────────┘ @@ -801,7 +839,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql wordShingleSimHashUTF8(string[, shinglesize]) ``` @@ -820,13 +858,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT wordShingleSimHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 2328277067 │ └────────────┘ @@ -840,7 +878,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) ``` @@ -859,13 +897,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT wordShingleSimHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 2194812424 │ └────────────┘ @@ -879,7 +917,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql ngramMinHash(string[, ngramsize, hashnum]) ``` @@ -899,13 +937,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT ngramMinHash('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (18333312859352735453,9054248444481805918) │ └────────────────────────────────────────────┘ @@ -919,7 +957,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) ``` @@ -939,13 +977,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT ngramMinHashCaseInsensitive('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (2106263556442004574,13203602793651726206) │ └────────────────────────────────────────────┘ @@ -959,7 +997,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql ngramMinHashUTF8(string[, ngramsize, hashnum]) ``` @@ -979,13 +1017,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT ngramMinHashUTF8('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (18333312859352735453,6742163577938632877) │ └────────────────────────────────────────────┘ @@ -999,7 +1037,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) ``` @@ -1019,13 +1057,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT ngramMinHashCaseInsensitiveUTF8('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple───────────────────────────────────────┐ │ (12493625717655877135,13203602793651726206) │ └─────────────────────────────────────────────┘ @@ -1037,7 +1075,7 @@ Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram **Syntax** -``` sql +```sql ngramMinHashArg(string[, ngramsize, hashnum]) ``` @@ -1057,13 +1095,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT ngramMinHashArg('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','Hou','kHo','use'),('Hou','lic','ick','ous','ckH','Cli')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1075,7 +1113,7 @@ Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram **Syntax** -``` sql +```sql ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) ``` @@ -1095,13 +1133,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT ngramMinHashArgCaseInsensitive('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','kHo','use','Cli'),('kHo','lic','ick','ous','ckH','Hou')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1113,7 +1151,7 @@ Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram **Syntax** -``` sql +```sql ngramMinHashArgUTF8(string[, ngramsize, hashnum]) ``` @@ -1133,13 +1171,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT ngramMinHashArgUTF8('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','Hou','kHo','use'),('kHo','Hou','lic','ick','ous','ckH')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1151,7 +1189,7 @@ Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram **Syntax** -``` sql +```sql ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) ``` @@ -1171,13 +1209,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT ngramMinHashArgCaseInsensitiveUTF8('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ckH','ous','ick','lic','kHo','use'),('kHo','lic','ick','ous','ckH','Hou')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1191,7 +1229,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql wordShingleMinHash(string[, shinglesize, hashnum]) ``` @@ -1211,13 +1249,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT wordShingleMinHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (16452112859864147620,5844417301642981317) │ └────────────────────────────────────────────┘ @@ -1231,7 +1269,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) ``` @@ -1251,13 +1289,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT wordShingleMinHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────┐ │ (3065874883688416519,1634050779997673240) │ └───────────────────────────────────────────┘ @@ -1271,7 +1309,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql wordShingleMinHashUTF8(string[, shinglesize, hashnum]) ``` @@ -1291,13 +1329,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT wordShingleMinHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (16452112859864147620,5844417301642981317) │ └────────────────────────────────────────────┘ @@ -1311,7 +1349,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) ``` @@ -1331,13 +1369,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT wordShingleMinHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────┐ │ (3065874883688416519,1634050779997673240) │ └───────────────────────────────────────────┘ @@ -1349,7 +1387,7 @@ Splits a ASCII string into parts (shingles) of `shinglesize` words each and retu **Syntax** -``` sql +```sql wordShingleMinHashArg(string[, shinglesize, hashnum]) ``` @@ -1369,13 +1407,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT wordShingleMinHashArg('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────┐ │ (('OLAP','database','analytical'),('online','oriented','processing')) │ └───────────────────────────────────────────────────────────────────────┘ @@ -1387,7 +1425,7 @@ Splits a ASCII string into parts (shingles) of `shinglesize` words each and retu **Syntax** -``` sql +```sql wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) ``` @@ -1407,13 +1445,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT wordShingleMinHashArgCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────────────────────────────────┐ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ @@ -1425,7 +1463,7 @@ Splits a UTF-8 string into parts (shingles) of `shinglesize` words each and retu **Syntax** -``` sql +```sql wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) ``` @@ -1445,13 +1483,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT wordShingleMinHashArgUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────┐ │ (('OLAP','database','analytical'),('online','oriented','processing')) │ └───────────────────────────────────────────────────────────────────────┘ @@ -1463,7 +1501,7 @@ Splits a UTF-8 string into parts (shingles) of `shinglesize` words each and retu **Syntax** -``` sql +```sql wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) ``` @@ -1483,13 +1521,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT wordShingleMinHashArgCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────────────────────────────────┐ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index fa52d32729f..a37398786af 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -478,3 +478,17 @@ Result: │ 0 │ └──────────────────────────────────────────────┘ ``` + +Query: + +``` sql +SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128'); +``` + +Result: + +``` text +┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐ +│ 0 │ +└────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index e9a15995a16..e53ea41d606 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -11,7 +11,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | --------| ----------| | ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported | | expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) | -| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported | +| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported | | `ROWS` frame | supported | | `RANGE` frame | supported, the default | | `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead | diff --git a/docs/ru/sql-reference/data-types/int-uint.md b/docs/ru/sql-reference/data-types/int-uint.md index 9e6e3a7fd96..06d6727afa4 100644 --- a/docs/ru/sql-reference/data-types/int-uint.md +++ b/docs/ru/sql-reference/data-types/int-uint.md @@ -31,7 +31,5 @@ sidebar_label: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64 - `UInt16` — \[0 : 65535\] - `UInt32` — \[0 : 4294967295\] - `UInt64` — \[0 : 18446744073709551615\] +- `UInt128` — \[0 : 340282366920938463463374607431768211455\] - `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\] - -`UInt128` пока не реализован. - diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md index 12091fcbc3f..0065275519b 100644 --- a/docs/ru/sql-reference/functions/hash-functions.md +++ b/docs/ru/sql-reference/functions/hash-functions.md @@ -13,7 +13,7 @@ Simhash – это хеш-функция, которая для близких [Интерпретирует](../../sql-reference/functions/hash-functions.md#type_conversion_functions-reinterpretAsString) все входные параметры как строки и вычисляет хэш [MD5](https://ru.wikipedia.org/wiki/MD5) для каждой из них. Затем объединяет хэши, берет первые 8 байт хэша результирующей строки и интерпретирует их как значение типа `UInt64` с big-endian порядком байтов. -``` sql +```sql halfMD5(par1, ...) ``` @@ -30,11 +30,11 @@ halfMD5(par1, ...) **Пример** -``` sql +```sql SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS halfMD5hash, toTypeName(halfMD5hash) AS type; ``` -``` text +```response ┌────────halfMD5hash─┬─type───┐ │ 186182704141653334 │ UInt64 │ └────────────────────┴────────┘ @@ -54,7 +54,7 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00') Генерирует 64-х битное значение [SipHash](https://131002.net/siphash/). -``` sql +```sql sipHash64(par1,...) ``` @@ -77,11 +77,11 @@ sipHash64(par1,...) **Пример** -``` sql +```sql SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS SipHash, toTypeName(SipHash) AS type; ``` -``` text +```response ┌──────────────SipHash─┬─type───┐ │ 13726873534472839665 │ UInt64 │ └──────────────────────┴────────┘ @@ -93,7 +93,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 **Синтаксис** -``` sql +```sql sipHash128(par1,...) ``` @@ -111,13 +111,13 @@ sipHash128(par1,...) Запрос: -``` sql +```sql SELECT hex(sipHash128('foo', '\x01', 3)); ``` Результат: -``` text +```response ┌─hex(sipHash128('foo', '', 3))────┐ │ 9DE516A64A414D4B1B609415E4523F24 │ └──────────────────────────────────┘ @@ -127,7 +127,7 @@ SELECT hex(sipHash128('foo', '\x01', 3)); Генерирует 64-х битное значение [CityHash](https://github.com/google/cityhash). -``` sql +```sql cityHash64(par1,...) ``` @@ -145,11 +145,11 @@ cityHash64(par1,...) Пример вызова: -``` sql +```sql SELECT cityHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS CityHash, toTypeName(CityHash) AS type; ``` -``` text +```response ┌─────────────CityHash─┬─type───┐ │ 12072650598913549138 │ UInt64 │ └──────────────────────┴────────┘ @@ -157,7 +157,7 @@ SELECT cityHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 А вот так вы можете вычислить чексумму всей таблицы с точностью до порядка строк: -``` sql +```sql SELECT groupBitXor(cityHash64(*)) FROM table ``` @@ -177,7 +177,7 @@ SELECT groupBitXor(cityHash64(*)) FROM table **Синтаксис** -``` sql +```sql SHA1('s') ... SHA512('s') @@ -203,18 +203,56 @@ SHA512('s') Запрос: -``` sql +```sql SELECT hex(SHA1('abc')); ``` Результат: -``` text +```response ┌─hex(SHA1('abc'))─────────────────────────┐ │ A9993E364706816ABA3E25717850C26C9CD0D89D │ └──────────────────────────────────────────┘ ``` +## BLAKE3 {#blake3} + +Вычисляет BLAKE3 хеш строки и возвращает полученный набор байт в виде [FixedString](../data-types/fixedstring.md). + +**Синтаксис** + +```sql +BLAKE3('s') +``` + +Данная криптографическая функция интегрирована в ClickHouse из Rust-библиотеки. Функция работает сравнительно быстро, показывая в 2 раза более быстрые результаты по сравнению с SHA-2, генерируя хеши аналогичной SHA-256 длины. + +**Параметры** + +- s - входная строка для вычисления хеша BLAKE3. [String](../data-types/string.md). + +**Возвращаемое значение** + +- Хеш BLAKE3 в виде шестнадцатеричной строки, имеющей тип FixedString(32). + +Тип: [FixedString](../data-types/fixedstring.md). + +**Пример** + +Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой. + +Запрос: +```sql +SELECT hex(BLAKE3('ABC')) +``` + +Результат: +```response +┌─hex(BLAKE3('ABC'))───────────────────────────────────────────────┐ +│ D1717274597CF0289694F75D96D444B992A096F1AFD8E7BBFA6EBB1D360FEDFC │ +└──────────────────────────────────────────────────────────────────┘ +``` + ## URLHash(url\[, N\]) {#urlhashurl-n} Быстрая не криптографическая хэш-функция неплохого качества для строки, полученной из URL путём некоторой нормализации. @@ -228,7 +266,7 @@ SELECT hex(SHA1('abc')); Создает 64-битное значение [FarmHash](https://github.com/google/farmhash), независимое от платформы (архитектуры сервера), что важно, если значения сохраняются или используются для разбиения данных на группы. -``` sql +```sql farmFingerprint64(par1, ...) farmHash64(par1, ...) ``` @@ -245,11 +283,11 @@ farmHash64(par1, ...) **Пример** -``` sql +```sql SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS FarmHash, toTypeName(FarmHash) AS type; ``` -``` text +```response ┌─────────────FarmHash─┬─type───┐ │ 17790458267262532859 │ UInt64 │ └──────────────────────┴────────┘ @@ -259,7 +297,7 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 Вычисляет [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) от строки. `JavaHash` не отличается ни скоростью, ни качеством, поэтому эту функцию следует считать устаревшей. Используйте эту функцию, если вам необходимо получить значение хэша по такому же алгоритму. -``` sql +```sql SELECT javaHash('') ``` @@ -273,13 +311,13 @@ SELECT javaHash('') Запрос: -``` sql +```sql SELECT javaHash('Hello, world!'); ``` Результат: -``` text +```response ┌─javaHash('Hello, world!')─┐ │ -1880044555 │ └───────────────────────────┘ @@ -291,7 +329,7 @@ SELECT javaHash('Hello, world!'); **Синтаксис** -``` sql +```sql javaHashUTF16LE(stringUtf16le) ``` @@ -311,13 +349,13 @@ javaHashUTF16LE(stringUtf16le) Запрос: -``` sql +```sql SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')); ``` Результат: -``` text +```response ┌─javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le'))─┐ │ 3556498 │ └──────────────────────────────────────────────────────────────┘ @@ -327,7 +365,7 @@ SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')); Вычисляет `HiveHash` от строки. -``` sql +```sql SELECT hiveHash('') ``` @@ -343,13 +381,13 @@ SELECT hiveHash('') Запрос: -``` sql +```sql SELECT hiveHash('Hello, world!'); ``` Результат: -``` text +```response ┌─hiveHash('Hello, world!')─┐ │ 267439093 │ └───────────────────────────┘ @@ -359,7 +397,7 @@ SELECT hiveHash('Hello, world!'); Генерирует 64-х битное значение [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/). -``` sql +```sql metroHash64(par1, ...) ``` @@ -373,11 +411,11 @@ metroHash64(par1, ...) **Пример** -``` sql +```sql SELECT metroHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MetroHash, toTypeName(MetroHash) AS type; ``` -``` text +```response ┌────────────MetroHash─┬─type───┐ │ 14235658766382344533 │ UInt64 │ └──────────────────────┴────────┘ @@ -393,7 +431,7 @@ SELECT metroHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00: Генерирует значение [MurmurHash2](https://github.com/aappleby/smhasher). -``` sql +```sql murmurHash2_32(par1, ...) murmurHash2_64(par1, ...) ``` @@ -409,11 +447,11 @@ murmurHash2_64(par1, ...) **Пример** -``` sql +```sql SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MurmurHash2, toTypeName(MurmurHash2) AS type; ``` -``` text +```response ┌──────────MurmurHash2─┬─type───┐ │ 11832096901709403633 │ UInt64 │ └──────────────────────┴────────┘ @@ -425,7 +463,7 @@ SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23: **Синтаксис** -``` sql +```sql gccMurmurHash(par1, ...); ``` @@ -443,7 +481,7 @@ gccMurmurHash(par1, ...); Запрос: -``` sql +```sql SELECT gccMurmurHash(1, 2, 3) AS res1, gccMurmurHash(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) AS res2 @@ -451,7 +489,7 @@ SELECT Результат: -``` text +```response ┌─────────────────res1─┬────────────────res2─┐ │ 12384823029245979431 │ 1188926775431157506 │ └──────────────────────┴─────────────────────┘ @@ -461,7 +499,7 @@ SELECT Генерирует значение [MurmurHash3](https://github.com/aappleby/smhasher). -``` sql +```sql murmurHash3_32(par1, ...) murmurHash3_64(par1, ...) ``` @@ -477,11 +515,11 @@ murmurHash3_64(par1, ...) **Пример** -``` sql +```sql SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; ``` -``` text +```response ┌─MurmurHash3─┬─type───┐ │ 2152717 │ UInt32 │ └─────────────┴────────┘ @@ -493,7 +531,7 @@ SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23: **Синтаксис** -``` sql +```sql murmurHash3_128(expr) ``` @@ -511,13 +549,13 @@ murmurHash3_128(expr) Запрос: -``` sql +```sql SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); ``` Результат: -``` text +```response ┌─hex(murmurHash3_128('foo', 'foo', 'foo'))─┐ │ F8F7AD9B6CD4CF117A71E277E2EC2931 │ └───────────────────────────────────────────┘ @@ -527,7 +565,7 @@ SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); Вычисляет `xxHash` от строки. Предлагается в двух вариантах: 32 и 64 бита. -``` sql +```sql SELECT xxHash32('') OR @@ -545,13 +583,13 @@ SELECT xxHash64('') Запрос: -``` sql +```sql SELECT xxHash32('Hello, world!'); ``` Результат: -``` text +```response ┌─xxHash32('Hello, world!')─┐ │ 834093149 │ └───────────────────────────┘ @@ -569,7 +607,7 @@ SELECT xxHash32('Hello, world!'); **Синтаксис** -``` sql +```sql ngramSimHash(string[, ngramsize]) ``` @@ -588,13 +626,13 @@ ngramSimHash(string[, ngramsize]) Запрос: -``` sql +```sql SELECT ngramSimHash('ClickHouse') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 1627567969 │ └────────────┘ @@ -608,7 +646,7 @@ SELECT ngramSimHash('ClickHouse') AS Hash; **Синтаксис** -``` sql +```sql ngramSimHashCaseInsensitive(string[, ngramsize]) ``` @@ -627,13 +665,13 @@ ngramSimHashCaseInsensitive(string[, ngramsize]) Запрос: -``` sql +```sql SELECT ngramSimHashCaseInsensitive('ClickHouse') AS Hash; ``` Результат: -``` text +```response ┌──────Hash─┐ │ 562180645 │ └───────────┘ @@ -647,7 +685,7 @@ SELECT ngramSimHashCaseInsensitive('ClickHouse') AS Hash; **Синтаксис** -``` sql +```sql ngramSimHashUTF8(string[, ngramsize]) ``` @@ -666,13 +704,13 @@ ngramSimHashUTF8(string[, ngramsize]) Запрос: -``` sql +```sql SELECT ngramSimHashUTF8('ClickHouse') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 1628157797 │ └────────────┘ @@ -686,7 +724,7 @@ SELECT ngramSimHashUTF8('ClickHouse') AS Hash; **Синтаксис** -``` sql +```sql ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) ``` @@ -705,13 +743,13 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) Запрос: -``` sql +```sql SELECT ngramSimHashCaseInsensitiveUTF8('ClickHouse') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 1636742693 │ └────────────┘ @@ -725,7 +763,7 @@ SELECT ngramSimHashCaseInsensitiveUTF8('ClickHouse') AS Hash; **Синтаксис** -``` sql +```sql wordShingleSimHash(string[, shinglesize]) ``` @@ -744,13 +782,13 @@ wordShingleSimHash(string[, shinglesize]) Запрос: -``` sql +```sql SELECT wordShingleSimHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 2328277067 │ └────────────┘ @@ -764,7 +802,7 @@ SELECT wordShingleSimHash('ClickHouse® is a column-oriented database management **Синтаксис** -``` sql +```sql wordShingleSimHashCaseInsensitive(string[, shinglesize]) ``` @@ -783,13 +821,13 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize]) Запрос: -``` sql +```sql SELECT wordShingleSimHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 2194812424 │ └────────────┘ @@ -803,7 +841,7 @@ SELECT wordShingleSimHashCaseInsensitive('ClickHouse® is a column-oriented data **Синтаксис** -``` sql +```sql wordShingleSimHashUTF8(string[, shinglesize]) ``` @@ -822,13 +860,13 @@ wordShingleSimHashUTF8(string[, shinglesize]) Запрос: -``` sql +```sql SELECT wordShingleSimHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 2328277067 │ └────────────┘ @@ -842,7 +880,7 @@ SELECT wordShingleSimHashUTF8('ClickHouse® is a column-oriented database manage **Синтаксис** -``` sql +```sql wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) ``` @@ -861,13 +899,13 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) Запрос: -``` sql +```sql SELECT wordShingleSimHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 2194812424 │ └────────────┘ @@ -881,7 +919,7 @@ SELECT wordShingleSimHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented **Синтаксис** -``` sql +```sql ngramMinHash(string[, ngramsize, hashnum]) ``` @@ -901,13 +939,13 @@ ngramMinHash(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHash('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (18333312859352735453,9054248444481805918) │ └────────────────────────────────────────────┘ @@ -921,7 +959,7 @@ SELECT ngramMinHash('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) ``` @@ -941,13 +979,13 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashCaseInsensitive('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (2106263556442004574,13203602793651726206) │ └────────────────────────────────────────────┘ @@ -960,7 +998,7 @@ SELECT ngramMinHashCaseInsensitive('ClickHouse') AS Tuple; Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают. **Синтаксис** -``` sql +```sql ngramMinHashUTF8(string[, ngramsize, hashnum]) ``` @@ -980,13 +1018,13 @@ ngramMinHashUTF8(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashUTF8('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (18333312859352735453,6742163577938632877) │ └────────────────────────────────────────────┘ @@ -1000,7 +1038,7 @@ SELECT ngramMinHashUTF8('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) ``` @@ -1020,13 +1058,13 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashCaseInsensitiveUTF8('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple───────────────────────────────────────┐ │ (12493625717655877135,13203602793651726206) │ └─────────────────────────────────────────────┘ @@ -1038,7 +1076,7 @@ SELECT ngramMinHashCaseInsensitiveUTF8('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashArg(string[, ngramsize, hashnum]) ``` @@ -1058,13 +1096,13 @@ ngramMinHashArg(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashArg('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','Hou','kHo','use'),('Hou','lic','ick','ous','ckH','Cli')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1076,7 +1114,7 @@ SELECT ngramMinHashArg('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) ``` @@ -1096,13 +1134,13 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashArgCaseInsensitive('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','kHo','use','Cli'),('kHo','lic','ick','ous','ckH','Hou')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1114,7 +1152,7 @@ SELECT ngramMinHashArgCaseInsensitive('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashArgUTF8(string[, ngramsize, hashnum]) ``` @@ -1134,13 +1172,13 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashArgUTF8('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','Hou','kHo','use'),('kHo','Hou','lic','ick','ous','ckH')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1152,7 +1190,7 @@ SELECT ngramMinHashArgUTF8('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) ``` @@ -1172,13 +1210,13 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashArgCaseInsensitiveUTF8('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ckH','ous','ick','lic','kHo','use'),('kHo','lic','ick','ous','ckH','Hou')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1192,7 +1230,7 @@ SELECT ngramMinHashArgCaseInsensitiveUTF8('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql wordShingleMinHash(string[, shinglesize, hashnum]) ``` @@ -1212,13 +1250,13 @@ wordShingleMinHash(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (16452112859864147620,5844417301642981317) │ └────────────────────────────────────────────┘ @@ -1232,7 +1270,7 @@ SELECT wordShingleMinHash('ClickHouse® is a column-oriented database management **Синтаксис** -``` sql +```sql wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) ``` @@ -1252,13 +1290,13 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────┐ │ (3065874883688416519,1634050779997673240) │ └───────────────────────────────────────────┘ @@ -1272,7 +1310,7 @@ SELECT wordShingleMinHashCaseInsensitive('ClickHouse® is a column-oriented data **Синтаксис** -``` sql +```sql wordShingleMinHashUTF8(string[, shinglesize, hashnum]) ``` @@ -1292,13 +1330,13 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (16452112859864147620,5844417301642981317) │ └────────────────────────────────────────────┘ @@ -1312,7 +1350,7 @@ SELECT wordShingleMinHashUTF8('ClickHouse® is a column-oriented database manage **Синтаксис** -``` sql +```sql wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) ``` @@ -1332,13 +1370,13 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────┐ │ (3065874883688416519,1634050779997673240) │ └───────────────────────────────────────────┘ @@ -1350,7 +1388,7 @@ SELECT wordShingleMinHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented **Синтаксис** -``` sql +```sql wordShingleMinHashArg(string[, shinglesize, hashnum]) ``` @@ -1370,13 +1408,13 @@ wordShingleMinHashArg(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashArg('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────┐ │ (('OLAP','database','analytical'),('online','oriented','processing')) │ └───────────────────────────────────────────────────────────────────────┘ @@ -1388,7 +1426,7 @@ SELECT wordShingleMinHashArg('ClickHouse® is a column-oriented database managem **Синтаксис** -``` sql +```sql wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) ``` @@ -1408,13 +1446,13 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashArgCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────────────────────────────────┐ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ @@ -1426,7 +1464,7 @@ SELECT wordShingleMinHashArgCaseInsensitive('ClickHouse® is a column-oriented d **Синтаксис** -``` sql +```sql wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) ``` @@ -1446,13 +1484,13 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashArgUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────┐ │ (('OLAP','database','analytical'),('online','oriented','processing')) │ └───────────────────────────────────────────────────────────────────────┘ @@ -1464,7 +1502,7 @@ SELECT wordShingleMinHashArgUTF8('ClickHouse® is a column-oriented database man **Синтаксис** -``` sql +```sql wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) ``` @@ -1484,13 +1522,13 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashArgCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────────────────────────────────┐ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md index f18ea08b633..428fd3dff31 100644 --- a/docs/ru/sql-reference/functions/ip-address-functions.md +++ b/docs/ru/sql-reference/functions/ip-address-functions.md @@ -446,3 +446,17 @@ SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16'); │ 0 │ └──────────────────────────────────────────────┘ ``` + +Запрос: + +``` sql +SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128'); +``` + +Результат: + +``` text +┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐ +│ 0 │ +└────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/tools/build.py b/docs/tools/build.py index b0d62e68b7f..f084a8e5c0c 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -157,6 +157,7 @@ def build(args): if not args.skip_website: website.process_benchmark_results(args) + website.minify_website(args) redirects.build_static_redirects(args) diff --git a/docs/tools/website.py b/docs/tools/website.py index 21fd636e1cb..f73da9bd3c4 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -1,7 +1,10 @@ +import hashlib import json import logging import os import shutil +import subprocess + import bs4 import util @@ -178,6 +181,59 @@ def build_website(args): f.write(content.encode("utf-8")) +def get_css_in(args): + return [ + f"'{args.website_dir}/css/bootstrap.css'", + f"'{args.website_dir}/css/docsearch.css'", + f"'{args.website_dir}/css/base.css'", + f"'{args.website_dir}/css/blog.css'", + f"'{args.website_dir}/css/docs.css'", + f"'{args.website_dir}/css/highlight.css'", + f"'{args.website_dir}/css/main.css'", + ] + + +def get_js_in(args): + return [ + f"'{args.website_dir}/js/jquery.js'", + f"'{args.website_dir}/js/popper.js'", + f"'{args.website_dir}/js/bootstrap.js'", + f"'{args.website_dir}/js/sentry.js'", + f"'{args.website_dir}/js/base.js'", + f"'{args.website_dir}/js/index.js'", + f"'{args.website_dir}/js/docsearch.js'", + f"'{args.website_dir}/js/docs.js'", + f"'{args.website_dir}/js/main.js'", + ] + + +def minify_website(args): + css_in = " ".join(get_css_in(args)) + css_out = f"{args.output_dir}/docs/css/base.css" + os.makedirs(f"{args.output_dir}/docs/css") + + command = f"cat {css_in}" + output = subprocess.check_output(command, shell=True) + with open(css_out, "wb+") as f: + f.write(output) + + with open(css_out, "rb") as f: + css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] + + js_in = " ".join(get_js_in(args)) + js_out = f"{args.output_dir}/docs/js/base.js" + os.makedirs(f"{args.output_dir}/docs/js") + + command = f"cat {js_in}" + output = subprocess.check_output(command, shell=True) + with open(js_out, "wb+") as f: + f.write(output) + + with open(js_out, "rb") as f: + js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] + logging.info(js_digest) + + def process_benchmark_results(args): benchmark_root = os.path.join(args.website_dir, "benchmark") required_keys = { diff --git a/docs/zh/sql-reference/functions/arithmetic-functions.md b/docs/zh/sql-reference/functions/arithmetic-functions.md index 7af0df084f3..15bec0d2107 100644 --- a/docs/zh/sql-reference/functions/arithmetic-functions.md +++ b/docs/zh/sql-reference/functions/arithmetic-functions.md @@ -80,4 +80,78 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0 返回数值的最小公倍数。 除以零或将最小负数除以-1时抛出异常。 +## max2 {#max2} + +比较两个值并返回最大值。返回值转换为[Float64](../../sql-reference/data-types/float.md)。 + +**语法** + +```sql +max2(value1, value2) +``` + +**参数** + +- `value1` — 第一个值,类型为[Int/UInt](../../sql-reference/data-types/int-uint.md)或[Float](../../sql-reference/data-types/float.md)。 +- `value2` — 第二个值,类型为[Int/UInt](../../sql-reference/data-types/int-uint.md)或[Float](../../sql-reference/data-types/float.md)。 + +**返回值** + +- 两个值中的最大值。 + +类型: [Float](../../sql-reference/data-types/float.md)。 + +**示例** + +查询语句: + +```sql +SELECT max2(-1, 2); +``` + +结果: + +```text +┌─max2(-1, 2)─┐ +│ 2 │ +└─────────────┘ +``` + +## min2 {#min2} + +比较两个值并返回最小值。返回值类型转换为[Float64](../../sql-reference/data-types/float.md)。 + +**语法** + +```sql +max2(value1, value2) +``` + +**参数** + +- `value1` — 第一个值,类型为[Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md)。 +- `value2` — 第二个值,类型为[Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md)。 + +**返回值** + +- 两个值中的最小值。 + +类型: [Float](../../sql-reference/data-types/float.md)。 + +**示例** + +查询语句: + +```sql +SELECT min2(-1, 2); +``` + +结果: + +```text +┌─min2(-1, 2)─┐ +│ -1 │ +└─────────────┘ +``` + [来源文章](https://clickhouse.com/docs/en/query_language/functions/arithmetic_functions/) diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index 6ec6d8fe352..0d4f51e1ddc 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -1,16 +1,92 @@ # 数组函数 {#shu-zu-han-shu} -## empty {#empty} +## empty {#empty函数} -对于空数组返回1,对于非空数组返回0。 -结果类型是UInt8。 -该函数也适用于字符串。 +检查输入的数组是否为空。 -## notEmpty {#notempty} +**语法** -对于空数组返回0,对于非空数组返回1。 -结果类型是UInt8。 -该函数也适用于字符串。 +``` sql +empty([x]) +``` + +如果一个数组中不包含任何元素,则此数组为空数组。 + +:::注意 +可以通过启用[optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns)配置进行优化。设置`optimize_functions_to_subcolumns = 1`后,函数通过读取[size0](../../sql-reference/data-types/array.md#array-size)子列获取结果,不在读取和处理整个数组列,查询语句`SELECT empty(arr) FROM TABLE;`将转化为`SELECT arr.size0 = 0 FROM TABLE;`。 +::: + +此函数也适用于[strings](string-functions.md#empty)或[UUID](uuid-functions.md#empty)。 + +**参数** + +- `[x]` — 输入的数组,类型为[数组](../data-types/array.md)。 + +**返回值** + +- 对于空数组返回`1`,对于非空数组返回`0`。 + +类型: [UInt8](../data-types/int-uint.md)。 + +**示例** + +查询语句: + +```sql +SELECT empty([]); +``` + +结果: + +```text +┌─empty(array())─┐ +│ 1 │ +└────────────────┘ +``` + +## notEmpty {#notempty函数} + +检测输入的数组是否非空。 + +**语法** + +``` sql +notEmpty([x]) +``` + +如果一个数组至少包含一个元素,则此数组为非空数组。 + +:::注意 +可以通过启用[optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns)配置进行优化。设置`optimize_functions_to_subcolumns = 1`后,函数通过读取[size0](../../sql-reference/data-types/array.md#array-size)子列获取结果,不在读取和处理整个数组列,查询语句`SELECT notEmpty(arr) FROM TABLE;`将转化为`SELECT arr.size0 != 0 FROM TABLE;`。 +::: + +此函数也适用于[strings](string-functions.md#empty)或[UUID](uuid-functions.md#empty)。 + +**参数** + +- `[x]` — 输入的数组,类型为[数组](../data-types/array.md)。 + +**返回值** + +- 对于空数组返回`0`,对于非空数组返回`1`。 + +类型: [UInt8](../data-types/int-uint.md). + +**示例** + +查询语句: + +```sql +SELECT notEmpty([1,2]); +``` + +结果: + +```text +┌─notEmpty([1, 2])─┐ +│ 1 │ +└──────────────────┘ +``` ## length {#array_functions-length} @@ -18,13 +94,15 @@ 结果类型是UInt64。 该函数也适用于字符串。 -## emptyArrayUInt8,emptyArrayUInt16,emptyArrayUInt32,emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} +可以通过启用[optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns)配置进行优化。设置`optimize_functions_to_subcolumns = 1`后,函数通过读取[size0](../../sql-reference/data-types/array.md#array-size)子列获取结果,不在读取和处理整个数组列,查询语句`SELECT length(arr) FROM table`将转化为`SELECT arr.size0 FROM TABLE`。 -## emptyArrayInt8,emptyArrayInt16,emptyArrayInt32,emptyArrayInt64 {#emptyarrayint8-emptyarrayint16-emptyarrayint32-emptyarrayint64} +## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} -## emptyArrayFloat32,emptyArrayFloat64 {#emptyarrayfloat32-emptyarrayfloat64} +## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 {#emptyarrayint8-emptyarrayint16-emptyarrayint32-emptyarrayint64} -## emptyArrayDate,emptyArrayDateTime {#emptyarraydate-emptyarraydatetime} +## emptyArrayFloat32, emptyArrayFloat64 {#emptyarrayfloat32-emptyarrayfloat64} + +## emptyArrayDate, emptyArrayDateTime {#emptyarraydate-emptyarraydatetime} ## emptyArrayString {#emptyarraystring} @@ -34,10 +112,43 @@ 接受一个空数组并返回一个仅包含一个默认值元素的数组。 -## range(N) {#rangen} +## range(end), range(\[start, \] end \[, step\]) {#range} -返回从0到N-1的数字数组。 -以防万一,如果在数据块中创建总长度超过100,000,000个元素的数组,则抛出异常。 +返回一个以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。 + +**语法** +``` sql +range([start, ] end [, step]) +``` + +**参数** + +- `start` — 数组的第一个元素。可选项,如果设置了`step`时同样需要`start`,默认值为:0,类型为[UInt](../data-types/int-uint.md)。 +- `end` — 计数到`end`结束,但不包括`end`,必填项,类型为[UInt](../data-types/int-uint.md)。 +- `step` — 确定数组中每个元素之间的增量步长。可选项,默认值为:1,类型为[UInt](../data-types/int-uint.md)。 + +**返回值** + +- 以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。 + +**注意事项** + +- 所有参数必须是正值:`start`、`end`、`step`,类型均为`UInt`,结果数组的元素与此相同。 +- 如果查询结果的数组总长度超过[function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block)指定的元素数,将会抛出异常。 + + +**示例** + +查询语句: +``` sql +SELECT range(5), range(1, 5), range(1, 5, 2); +``` +结果: +```txt +┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐ +│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ +└─────────────┴─────────────┴────────────────┘ +``` ## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1} @@ -49,7 +160,9 @@ 合并参数中传递的所有数组。 - arrayConcat(arrays) +``` sql +arrayConcat(arrays) +``` **参数** @@ -62,9 +175,11 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res ``` - ┌─res───────────┐ - │ [1,2,3,4,5,6] │ - └───────────────┘ +``` text +┌─res───────────┐ +│ [1,2,3,4,5,6] │ +└───────────────┘ +``` ## arrayElement(arr,n),运算符arr\[n\] {#arrayelementarr-n-operator-arrn} @@ -156,22 +271,76 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res `SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [1, 2]])` 返回 `1`. +## hasSubstr {#hassubstr} + +检查 array2 的所有元素是否以相同的顺序出现在 array1 中。当且仅当 `array1 = prefix + array2 + suffix`时,该函数将返回 1。 + +``` sql +hasSubstr(array1, array2) +``` + +换句话说,函数将检查 `array2` 的所有元素是否都包含在 `array1` 中,就像 `hasAll` 函数一样。此外,它将检查元素在“array1”和“array2”中的出现顺序是否相同。 + +举例: +- `hasSubstr([1,2,3,4], [2,3])`返回`1`。然而`hasSubstr([1,2,3,4], [3,2])`将返回`0`。 +- `hasSubstr([1,2,3,4], [1,2,3])`返回`1`。然而`hasSubstr([1,2,3,4], [1,2,4])`将返回`0`。 + +**参数** + +- `array1` – 具有一组元素的任何类型数组。 +- `array2` – 具有一组元素的任何类型数组。 + +**返回值** + +- 如果`array1`中包含`array2`,返回`1`。 +- 其他情况返回`0`。 + +**特殊属性** + +- 当`array2`为空数组时,函数将返回`1`. +- `Null` 作为值处理。换句话说,`hasSubstr([1, 2, NULL, 3, 4], [2,3])` 将返回 `0`。但是,`hasSubstr([1, 2, NULL, 3, 4], [2,NULL,3])` 将返回 `1`。 +- 两个数组中**值的顺序**会影响函数结果。 + +**示例** + +`SELECT hasSubstr([], [])`返回1。 + +`SELECT hasSubstr([1, Null], [Null])`返回1。 + +`SELECT hasSubstr([1.0, 2, 3, 4], [1, 3])`返回0。 + +`SELECT hasSubstr(['a', 'b'], ['a'])`返回1。 + +`SELECT hasSubstr(['a', 'b' , 'c'], ['a', 'b'])`返回1。 + +`SELECT hasSubstr(['a', 'b' , 'c'], ['a', 'c'])`返回0。 + +`SELECT hasSubstr([[1, 2], [3, 4], [5, 6]], [[1, 2], [3, 4]])`返回1。 + ## indexOf(arr,x) {#indexofarr-x} 返回数组中第一个’x’元素的索引(从1开始),如果’x’元素不存在在数组中,则返回0。 示例: - :) SELECT indexOf([1,3,NULL,NULL],NULL) +``` sql +SELECT indexOf([1, 3, NULL, NULL], NULL) +``` - SELECT indexOf([1, 3, NULL, NULL], NULL) - - ┌─indexOf([1, 3, NULL, NULL], NULL)─┐ - │ 3 │ - └───────────────────────────────────┘ +``` text +┌─indexOf([1, 3, NULL, NULL], NULL)─┐ +│ 3 │ +└───────────────────────────────────┘ +``` 设置为«NULL»的元素将作为普通的元素值处理。 +## arrayCount(\[func,\] arr1, …) {#array-count} + +`func`将arr数组作为参数,其返回结果为非零值的数量。如果未指定“func”,则返回数组中非零元素的数量。 + +请注意,`arrayCount`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它。 + ## countEqual(arr,x) {#countequalarr-x} 返回数组中等于x的元素的个数。相当于arrayCount(elem - \> elem = x,arr)。 @@ -180,11 +349,15 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res 示例: - SELECT countEqual([1, 2, NULL, NULL], NULL) +``` sql +SELECT countEqual([1, 2, NULL, NULL], NULL) +``` - ┌─countEqual([1, 2, NULL, NULL], NULL)─┐ - │ 2 │ - └──────────────────────────────────────┘ +``` text +┌─countEqual([1, 2, NULL, NULL], NULL)─┐ +│ 2 │ +└──────────────────────────────────────┘ +``` ## arrayEnumerate(arr) {#array_functions-arrayenumerate} @@ -204,9 +377,11 @@ WHERE CounterID = 160656 LIMIT 10 ``` - ┌─Reaches─┬──Hits─┐ - │ 95606 │ 31406 │ - └─────────┴───────┘ +``` text +┌─Reaches─┬──Hits─┐ +│ 95606 │ 31406 │ +└─────────┴───────┘ +``` 在此示例中,Reaches是转换次数(应用ARRAY JOIN后接收的字符串),Hits是浏览量(ARRAY JOIN之前的字符串)。在这种特殊情况下,您可以更轻松地获得相同的结果: @@ -218,9 +393,11 @@ FROM test.hits WHERE (CounterID = 160656) AND notEmpty(GoalsReached) ``` - ┌─Reaches─┬──Hits─┐ - │ 95606 │ 31406 │ - └─────────┴───────┘ +``` text +┌─Reaches─┬──Hits─┐ +│ 95606 │ 31406 │ +└─────────┴───────┘ +``` 此功能也可用于高阶函数。例如,您可以使用它来获取与条件匹配的元素的数组索引。 @@ -248,18 +425,20 @@ ORDER BY Reaches DESC LIMIT 10 ``` - ┌──GoalID─┬─Reaches─┬─Visits─┐ - │ 53225 │ 3214 │ 1097 │ - │ 2825062 │ 3188 │ 1097 │ - │ 56600 │ 2803 │ 488 │ - │ 1989037 │ 2401 │ 365 │ - │ 2830064 │ 2396 │ 910 │ - │ 1113562 │ 2372 │ 373 │ - │ 3270895 │ 2262 │ 812 │ - │ 1084657 │ 2262 │ 345 │ - │ 56599 │ 2260 │ 799 │ - │ 3271094 │ 2256 │ 812 │ - └─────────┴─────────┴────────┘ +``` text +┌──GoalID─┬─Reaches─┬─Visits─┐ +│ 53225 │ 3214 │ 1097 │ +│ 2825062 │ 3188 │ 1097 │ +│ 56600 │ 2803 │ 488 │ +│ 1989037 │ 2401 │ 365 │ +│ 2830064 │ 2396 │ 910 │ +│ 1113562 │ 2372 │ 373 │ +│ 3270895 │ 2262 │ 812 │ +│ 1084657 │ 2262 │ 345 │ +│ 56599 │ 2260 │ 799 │ +│ 3271094 │ 2256 │ 812 │ +└─────────┴─────────┴────────┘ +``` 在此示例中,每个GoalID都计算转换次数(目标嵌套数据结构中的每个元素都是达到的目标,我们称之为转换)和会话数。如果没有ARRAY JOIN,我们会将会话数计为总和(Sign)。但在这种特殊情况下,行乘以嵌套的Goals结构,因此为了在此之后计算每个会话一次,我们将一个条件应用于arrayEnumerateUniq(Goals.ID)函数的值。 @@ -269,9 +448,11 @@ arrayEnumerateUniq函数可以使用与参数大小相同的多个数组。在 SELECT arrayEnumerateUniq([1, 1, 1, 2, 2, 2], [1, 1, 2, 1, 1, 2]) AS res ``` - ┌─res───────────┐ - │ [1,2,1,1,2,1] │ - └───────────────┘ +``` text +┌─res───────────┐ +│ [1,2,1,1,2,1] │ +└───────────────┘ +``` 当使用带有嵌套数据结构的ARRAY JOIN并在此结构中跨多个元素进一步聚合时,这是必需的。 @@ -291,10 +472,11 @@ SELECT arrayEnumerateUniq([1, 1, 1, 2, 2, 2], [1, 1, 2, 1, 1, 2]) AS res SELECT arrayPopBack([1, 2, 3]) AS res ``` - ┌─res───┐ - │ [1,2] │ - └───────┘ - +``` text +┌─res───┐ +│ [1,2] │ +└───────┘ +``` ## arrayPopFront {#arraypopfront} 从数组中删除第一项。 @@ -311,15 +493,19 @@ SELECT arrayPopBack([1, 2, 3]) AS res SELECT arrayPopFront([1, 2, 3]) AS res ``` - ┌─res───┐ - │ [2,3] │ - └───────┘ +``` text +┌─res───┐ +│ [2,3] │ +└───────┘ +``` ## arrayPushBack {#arraypushback} 添加一个元素到数组的末尾。 - arrayPushBack(array, single_value) +``` sql +arrayPushBack(array, single_value) +``` **参数** @@ -332,15 +518,19 @@ SELECT arrayPopFront([1, 2, 3]) AS res SELECT arrayPushBack(['a'], 'b') AS res ``` - ┌─res───────┐ - │ ['a','b'] │ - └───────────┘ +``` text +┌─res───────┐ +│ ['a','b'] │ +└───────────┘ +``` ## arrayPushFront {#arraypushfront} 将一个元素添加到数组的开头。 - arrayPushFront(array, single_value) +``` sql +arrayPushFront(array, single_value) +``` **参数** @@ -353,15 +543,19 @@ SELECT arrayPushBack(['a'], 'b') AS res SELECT arrayPushFront(['b'], 'a') AS res ``` - ┌─res───────┐ - │ ['a','b'] │ - └───────────┘ +``` text +┌─res───────┐ +│ ['a','b'] │ +└───────────┘ +``` ## arrayResize {#arrayresize} 更改数组的长度。 - arrayResize(array, size[, extender]) +``` sql +arrayResize(array, size[, extender]) +``` **参数:** @@ -377,23 +571,33 @@ SELECT arrayPushFront(['b'], 'a') AS res **调用示例** - SELECT arrayResize([1], 3) +``` sql +SELECT arrayResize([1], 3); +``` - ┌─arrayResize([1], 3)─┐ - │ [1,0,0] │ - └─────────────────────┘ +``` text +┌─arrayResize([1], 3)─┐ +│ [1,0,0] │ +└─────────────────────┘ +``` - SELECT arrayResize([1], 3, NULL) +``` sql +SELECT arrayResize([1], 3, NULL); +``` - ┌─arrayResize([1], 3, NULL)─┐ - │ [1,NULL,NULL] │ - └───────────────────────────┘ +``` text +┌─arrayResize([1], 3, NULL)─┐ +│ [1,NULL,NULL] │ +└───────────────────────────┘ +``` ## arraySlice {#arrayslice} 返回一个子数组,包含从指定位置的指定长度的元素。 - arraySlice(array, offset[, length]) +``` sql +arraySlice(array, offset[, length]) +``` **参数** @@ -407,9 +611,11 @@ SELECT arrayPushFront(['b'], 'a') AS res SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res ``` - ┌─res────────┐ - │ [2,NULL,4] │ - └────────────┘ +``` text +┌─res────────┐ +│ [2,NULL,4] │ +└────────────┘ +``` 设置为«NULL»的数组元素作为普通的数组元素值处理。 @@ -423,9 +629,11 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res SELECT arraySort([1, 3, 3, 0]); ``` - ┌─arraySort([1, 3, 3, 0])─┐ - │ [0,1,3,3] │ - └─────────────────────────┘ +``` text +┌─arraySort([1, 3, 3, 0])─┐ +│ [0,1,3,3] │ +└─────────────────────────┘ +``` 字符串排序示例: @@ -433,9 +641,11 @@ SELECT arraySort([1, 3, 3, 0]); SELECT arraySort(['hello', 'world', '!']); ``` - ┌─arraySort(['hello', 'world', '!'])─┐ - │ ['!','hello','world'] │ - └────────────────────────────────────┘ +``` text +┌─arraySort(['hello', 'world', '!'])─┐ +│ ['!','hello','world'] │ +└────────────────────────────────────┘ +``` `NULL`,`NaN`和`Inf`的排序顺序: @@ -443,9 +653,11 @@ SELECT arraySort(['hello', 'world', '!']); SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]); ``` - ┌─arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf])─┐ - │ [-inf,-4,1,2,3,inf,nan,nan,NULL,NULL] │ - └───────────────────────────────────────────────────────────┘ +``` text +┌─arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf])─┐ +│ [-inf,-4,1,2,3,inf,nan,nan,NULL,NULL] │ +└───────────────────────────────────────────────────────────┘ +``` - `-Inf` 是数组中的第一个。 - `NULL` 是数组中的最后一个。 @@ -460,9 +672,11 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]); SELECT arraySort((x) -> -x, [1, 2, 3]) as res; ``` - ┌─res─────┐ - │ [3,2,1] │ - └─────────┘ +``` text +┌─res─────┐ +│ [3,2,1] │ +└─────────┘ +``` 对于源数组的每个元素,lambda函数返回排序键,即\[1 -\> -1, 2 -\> -2, 3 -\> -3\]。由于`arraySort`函数按升序对键进行排序,因此结果为\[3,2,1\]。因此,`(x) -> -x` lambda函数将排序设置为[降序](#array_functions-reverse-sort)。 @@ -472,9 +686,11 @@ lambda函数可以接受多个参数。在这种情况下,您需要为`arraySo SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; ``` - ┌─res────────────────┐ - │ ['world', 'hello'] │ - └────────────────────┘ +``` text +┌─res────────────────┐ +│ ['world', 'hello'] │ +└────────────────────┘ +``` 这里,在第二个数组(\[2, 1\])中定义了第一个数组(\[‘hello’,‘world’\])的相应元素的排序键,即\[‘hello’ -\> 2,‘world’ -\> 1\]。 由于lambda函数中没有使用`x`,因此源数组中的实际值不会影响结果的顺序。所以,’world’将是结果中的第一个元素,’hello’将是结果中的第二个元素。 @@ -484,7 +700,7 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; SELECT arraySort((x, y) -> y, [0, 1, 2], ['c', 'b', 'a']) as res; ``` -``` sql +``` text ┌─res─────┐ │ [2,1,0] │ └─────────┘ @@ -494,7 +710,7 @@ SELECT arraySort((x, y) -> y, [0, 1, 2], ['c', 'b', 'a']) as res; SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; ``` -``` sql +``` text ┌─res─────┐ │ [2,1,0] │ └─────────┘ @@ -513,9 +729,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; SELECT arrayReverseSort([1, 3, 3, 0]); ``` - ┌─arrayReverseSort([1, 3, 3, 0])─┐ - │ [3,3,1,0] │ - └────────────────────────────────┘ +``` text +┌─arrayReverseSort([1, 3, 3, 0])─┐ +│ [3,3,1,0] │ +└────────────────────────────────┘ +``` 字符串排序示例: @@ -523,9 +741,11 @@ SELECT arrayReverseSort([1, 3, 3, 0]); SELECT arrayReverseSort(['hello', 'world', '!']); ``` - ┌─arrayReverseSort(['hello', 'world', '!'])─┐ - │ ['world','hello','!'] │ - └───────────────────────────────────────────┘ +``` text +┌─arrayReverseSort(['hello', 'world', '!'])─┐ +│ ['world','hello','!'] │ +└───────────────────────────────────────────┘ +``` `NULL`,`NaN`和`Inf`的排序顺序: @@ -533,7 +753,7 @@ SELECT arrayReverseSort(['hello', 'world', '!']); SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res; ``` -``` sql +``` text ┌─res───────────────────────────────────┐ │ [inf,3,2,1,-4,-inf,nan,nan,NULL,NULL] │ └───────────────────────────────────────┘ @@ -550,11 +770,12 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res; SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res; ``` - ┌─res─────┐ - │ [1,2,3] │ - └─────────┘ +``` text +┌─res─────┐ +│ [1,2,3] │ +└─────────┘ +``` -数组按以下方式排序: 数组按以下方式排序: 1. 首先,根据lambda函数的调用结果对源数组(\[1, 2, 3\])进行排序。 结果是\[3, 2, 1\]。 @@ -566,7 +787,7 @@ lambda函数可以接受多个参数。在这种情况下,您需要为`arrayRe SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; ``` -``` sql +``` text ┌─res───────────────┐ │ ['hello','world'] │ └───────────────────┘ @@ -583,7 +804,7 @@ SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; SELECT arrayReverseSort((x, y) -> y, [4, 3, 5], ['a', 'b', 'c']) AS res; ``` -``` sql +``` text ┌─res─────┐ │ [5,3,4] │ └─────────┘ @@ -593,7 +814,7 @@ SELECT arrayReverseSort((x, y) -> y, [4, 3, 5], ['a', 'b', 'c']) AS res; SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; ``` -``` sql +``` text ┌─res─────┐ │ [4,3,5] │ └─────────┘ @@ -610,35 +831,108 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; 一个特殊的功能。请参见[«ArrayJoin函数»](array-join.md#functions_arrayjoin)部分。 -## arrayDifference(arr) {#arraydifferencearr} +## arrayDifference {#arraydifference} -返回一个数组,其中包含所有相邻元素对之间的差值。例如: +计算相邻数组元素之间的差异。返回一个数组,其中第一个元素为 0,第二个是 `a[1] - a[0]` 之差等。结果数组中元素的类型由减法的类型推断规则确定(例如`UInt8` - `UInt8` = `Int16`)。 + +**语法** ``` sql -SELECT arrayDifference([1, 2, 3, 4]) +arrayDifference(array) ``` - ┌─arrayDifference([1, 2, 3, 4])─┐ - │ [0,1,1,1] │ - └───────────────────────────────┘ +**参数** -## arrayDistinct(arr) {#arraydistinctarr} +- `array` –类型为[数组](https://clickhouse.com/docs/en/data_types/array/)。 -返回一个包含所有数组中不同元素的数组。例如: +**返回值** + +返回相邻元素之间的差异数组。 + +类型: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/)。 + +**示例** + +查询语句: ``` sql -SELECT arrayDistinct([1, 2, 2, 3, 1]) +SELECT arrayDifference([1, 2, 3, 4]); ``` - ┌─arrayDistinct([1, 2, 2, 3, 1])─┐ - │ [1,2,3] │ - └────────────────────────────────┘ +结果: + +``` text +┌─arrayDifference([1, 2, 3, 4])─┐ +│ [0,1,1,1] │ +└───────────────────────────────┘ +``` + +由于结果类型为Int64导致的溢出示例: + +查询语句: + +``` sql +SELECT arrayDifference([0, 10000000000000000000]); +``` + +结果: + +``` text +┌─arrayDifference([0, 10000000000000000000])─┐ +│ [0,-8446744073709551616] │ +└────────────────────────────────────────────┘ +``` +## arrayDistinct {#arraydistinctarr} + +返回一个包含所有数组中不同元素的数组。 + +**语法** + +``` sql +arrayDistinct(array) +``` + +**参数** + +- `array` –类型为[数组](https://clickhouse.com/docs/en/data_types/array/)。 + +**返回值** + +返回一个包含不同元素的数组。 + +**示例** + +查询语句: + +``` sql +SELECT arrayDistinct([1, 2, 2, 3, 1]); +``` + +结果: + +``` text +┌─arrayDistinct([1, 2, 2, 3, 1])─┐ +│ [1,2,3] │ +└────────────────────────────────┘ +``` ## arrayEnumerateDense(arr) {#arrayenumeratedensearr} 返回与源数组大小相同的数组,指示每个元素首次出现在源数组中的位置。例如:arrayEnumerateDense(\[10,20,10,30\])= \[1,2,1,3\]。 -## arrayIntersect(arr) {#arrayintersectarr} +示例: + +``` sql +SELECT arrayEnumerateDense([10, 20, 10, 30]) +``` + +``` text +┌─arrayEnumerateDense([10, 20, 10, 30])─┐ +│ [1,2,1,3] │ +└───────────────────────────────────────┘ +``` + +## arrayIntersect(arr) {#array-functions-arrayintersect} 返回所有数组元素的交集。例如: @@ -648,18 +942,755 @@ SELECT arrayIntersect([1, 2], [1, 3], [1, 4]) AS intersect ``` - ┌─no_intersect─┬─intersect─┐ - │ [] │ [1] │ - └──────────────┴───────────┘ +``` text +┌─no_intersect─┬─intersect─┐ +│ [] │ [1] │ +└──────────────┴───────────┘ +``` -## arrayReduce(agg_func, arr1, …) {#arrayreduceagg-func-arr1} +## arrayReduce {#arrayreduce} -将聚合函数应用于数组并返回其结果。如果聚合函数具有多个参数,则此函数可应用于相同大小的多个数组。 +将聚合函数应用于数组元素并返回其结果。聚合函数的名称以单引号 `'max'`、`'sum'` 中的字符串形式传递。使用参数聚合函数时,参数在括号中的函数名称后指示“uniqUpTo(6)”。 -arrayReduce(‘agg_func’,arr1,…) - 将聚合函数`agg_func`应用于数组`arr1 ...`。如果传递了多个数组,则相应位置上的元素将作为多个参数传递给聚合函数。例如:SELECT arrayReduce(‘max’,\[1,2,3\])= 3 +**语法** -## arrayReverse(arr) {#arrayreversearr} +``` sql +arrayReduce(agg_func, arr1, arr2, ..., arrN) +``` -返回与源数组大小相同的数组,包含反转源数组的所有元素的结果。 +**参数** + +- `agg_func` — 聚合函数的名称,应该是一个常量[string](../../sql-reference/data-types/string.md)。 +- `arr` — 任意数量的[数组](../../sql-reference/data-types/array.md)类型列作为聚合函数的参数。 + +**返回值** + +**示例** + +查询语句: + +``` sql +SELECT arrayReduce('max', [1, 2, 3]); +``` + +结果: + +``` text +┌─arrayReduce('max', [1, 2, 3])─┐ +│ 3 │ +└───────────────────────────────┘ +``` + +如果聚合函数采用多个参数,则该函数必须应用于多个相同大小的数组。 + +查询语句: + +``` sql +SELECT arrayReduce('maxIf', [3, 5], [1, 0]); +``` + +结果: + +``` text +┌─arrayReduce('maxIf', [3, 5], [1, 0])─┐ +│ 3 │ +└──────────────────────────────────────┘ +``` + +带有参数聚合函数的示例: + +查询语句: + +``` sql +SELECT arrayReduce('uniqUpTo(3)', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); +``` + +结果: + +``` text +┌─arrayReduce('uniqUpTo(3)', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])─┐ +│ 4 │ +└─────────────────────────────────────────────────────────────┘ +``` + +## arrayReduceInRanges {#arrayreduceinranges} + +将聚合函数应用于给定范围内的数组元素,并返回一个包含与每个范围对应的结果的数组。该函数将返回与多个 `arrayReduce(agg_func, arraySlice(arr1, index, length), ...)` 相同的结果。 + +**语法** + +``` sql +arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) +``` + +**参数** + +- `agg_func` — 聚合函数的名称,应该是一个常量[string](../../sql-reference/data-types/string.md)。 +- `ranges` — 要聚合的范围应该是[元组](../../sql-reference/data-types/tuple.md)为元素的[数组](../../sql-reference/data-types/array.md),其中包含每个索引和长度范围。 +- `arr` — 任意数量的[数组](../../sql-reference/data-types/array.md)类型列作为聚合函数的参数。 + +**返回值** + +- 包含指定范围内聚合函数结果的数组。 + +类型为: [数组](../../sql-reference/data-types/array.md). + +**示例** + +查询语句: + +``` sql +SELECT arrayReduceInRanges( + 'sum', + [(1, 5), (2, 3), (3, 4), (4, 4)], + [1000000, 200000, 30000, 4000, 500, 60, 7] +) AS res +``` + +结果: + +``` text +┌─res─────────────────────────┐ +│ [1234500,234000,34560,4567] │ +└─────────────────────────────┘ +``` + +## arrayReverse(arr) {#arrayreverse} + +返回一个与原始数组大小相同的数组,其中包含相反顺序的元素。 + +示例: + +``` sql +SELECT arrayReverse([1, 2, 3]) +``` + +``` text +┌─arrayReverse([1, 2, 3])─┐ +│ [3,2,1] │ +└─────────────────────────┘ +``` + +## reverse(arr) {#array-functions-reverse} + +与[“arrayReverse”](#arrayreverse)作用相同。 + +## arrayFlatten {#arrayflatten} + +将嵌套的数组展平。 + +函数: + +- 适用于任何深度的嵌套数组。 +- 不会更改已经展平的数组。 + +展平后的数组包含来自所有源数组的所有元素。 + +**语法** + +``` sql +flatten(array_of_arrays) +``` + +别名: `flatten`. + +**参数** + +- `array_of_arrays` — 嵌套[数组](../../sql-reference/data-types/array.md)。 例如:`[[1,2,3], [4,5]]`。 + +**示例** + +``` sql +SELECT flatten([[[1]], [[2], [3]]]); +``` + +``` text +┌─flatten(array(array([1]), array([2], [3])))─┐ +│ [1,2,3] │ +└─────────────────────────────────────────────┘ +``` + +## arrayCompact {#arraycompact} + +从数组中删除连续的重复元素。结果值的顺序由源数组中的顺序决定。 + +**语法** + +``` sql +arrayCompact(arr) +``` + +**参数** + +`arr` — 类型为[数组](../../sql-reference/data-types/array.md)。 + +**返回值** + +没有重复元素的数组。 + +类型为: `Array`。 + +**示例** + +查询语句: + +``` sql +SELECT arrayCompact([1, 1, nan, nan, 2, 3, 3, 3]); +``` + +结果: + +``` text +┌─arrayCompact([1, 1, nan, nan, 2, 3, 3, 3])─┐ +│ [1,nan,nan,2,3] │ +└────────────────────────────────────────────┘ +``` + +## arrayZip {#arrayzip} + +将多个数组组合成一个数组。结果数组包含按列出的参数顺序分组为元组的源数组的相应元素。 + +**语法** + +``` sql +arrayZip(arr1, arr2, ..., arrN) +``` + +**参数** + +- `arrN` — N个[数组](../../sql-reference/data-types/array.md)。 + +该函数可以采用任意数量的不同类型的数组。所有输入数组的大小必须相等。 + +**返回值** + +- 将源数组中的元素分组为[元组](../../sql-reference/data-types/tuple.md)的数组。元组中的数据类型与输入数组的类型相同,并且与传递数组的顺序相同。 + +类型为: [数组](../../sql-reference/data-types/array.md)。 + +**示例** + +查询语句: + +``` sql +SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]); +``` + +结果: + +``` text +┌─arrayZip(['a', 'b', 'c'], [5, 2, 1])─┐ +│ [('a',5),('b',2),('c',1)] │ +└──────────────────────────────────────┘ +``` + +## arrayAUC {#arrayauc} + +计算AUC (ROC曲线下的面积,这是机器学习中的一个概念,更多细节请查看:https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve)。 + +**语法** + +``` sql +arrayAUC(arr_scores, arr_labels) +``` + +**参数** + +- `arr_scores` — 分数预测模型给出。 +- `arr_labels` — 样本的标签,通常为 1 表示正样本,0 表示负样本。 + +**返回值** + +返回 Float64 类型的 AUC 值。 + +**示例** + +查询语句: + +``` sql +select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); +``` + +结果: + +``` text +┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ +│ 0.75 │ +└───────────────────────────────────────────────┘ +``` + +## arrayMap(func, arr1, …) {#array-map} + +将从 `func` 函数的原始应用中获得的数组返回给 `arr` 数组中的每个元素。 + +示例: + +``` sql +SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res; +``` + +``` text +┌─res─────┐ +│ [3,4,5] │ +└─────────┘ +``` + +下面的例子展示了如何从不同的数组创建一个元素的元组: + +``` sql +SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res +``` + +``` text +┌─res─────────────────┐ +│ [(1,4),(2,5),(3,6)] │ +└─────────────────────┘ +``` + +请注意,`arrayMap` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayFilter(func, arr1, …) {#array-filter} + +返回一个仅包含 `arr1` 中的元素的数组,其中 `func` 返回的值不是 0。 + +示例: + +``` sql +SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res +``` + +``` text +┌─res───────────┐ +│ ['abc World'] │ +└───────────────┘ +``` + +``` sql +SELECT + arrayFilter( + (i, x) -> x LIKE '%World%', + arrayEnumerate(arr), + ['Hello', 'abc World'] AS arr) + AS res +``` + +``` text +┌─res─┐ +│ [2] │ +└─────┘ +``` + +请注意,`arrayFilter`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayFill(func, arr1, …) {#array-fill} + +从第一个元素到最后一个元素扫描`arr1`,如果`func`返回0,则用`arr1[i - 1]`替换`arr1[i]`。`arr1`的第一个元素不会被替换。 + +示例: + +``` sql +SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res +``` + +``` text +┌─res──────────────────────────────┐ +│ [1,1,3,11,12,12,12,5,6,14,14,14] │ +└──────────────────────────────────┘ +``` + +请注意,`arrayFill` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayReverseFill(func, arr1, …) {#array-reverse-fill} + +从最后一个元素到第一个元素扫描`arr1`,如果`func`返回0,则用`arr1[i + 1]`替换`arr1[i]`。`arr1`的最后一个元素不会被替换。 + +示例: + +``` sql +SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res +``` + +``` text +┌─res────────────────────────────────┐ +│ [1,3,3,11,12,5,5,5,6,14,NULL,NULL] │ +└────────────────────────────────────┘ +``` + +请注意,`arrayReverseFill`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arraySplit(func, arr1, …) {#array-split} + +将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时,数组将在元素的左侧拆分。数组不会在第一个元素之前被拆分。 + +示例: + +``` sql +SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res +``` + +``` text +┌─res─────────────┐ +│ [[1,2,3],[4,5]] │ +└─────────────────┘ +``` + +请注意,`arraySplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayReverseSplit(func, arr1, …) {#array-reverse-split} + +将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时,数组将在元素的右侧拆分。数组不会在最后一个元素之后被拆分。 + +示例: + +``` sql +SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res +``` + +``` text +┌─res───────────────┐ +│ [[1],[2,3,4],[5]] │ +└───────────────────┘ +``` + +请注意,`arrayReverseSplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} + +如果 `arr` 中至少有一个元素 `func` 返回 0 以外的值,则返回 1。否则,它返回 0。 + +请注意,`arrayExists`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} + +如果 `func` 为 `arr` 中的所有元素返回 0 以外的值,则返回 1。否则,它返回 0。 + +请注意,`arrayAll`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayFirst(func, arr1, …) {#array-first} + +返回 `arr1` 数组中 `func` 返回非 0 的值的第一个元素。 + +请注意,`arrayFirst`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayLast(func, arr1, …) {#array-last} + +返回 `arr1` 数组中的最后一个元素,其中 `func` 返回的值不是 0。 + +请注意,`arrayLast`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayFirstIndex(func, arr1, …) {#array-first-index} + +返回 `arr1` 数组中第一个元素的索引,其中 `func` 返回的值不是 0。 + +请注意,`arrayFirstIndex`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayLastIndex(func, arr1, …) {#array-last-index} + +返回 `arr1` 数组中最后一个元素的索引,其中 `func` 返回的值不是 0。 + +请注意,`arrayLastIndex`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayMin {#array-min} + +返回源数组中的最小元素。 + +如果指定了 `func` 函数,则返回此函数转换的元素的最小值。 + +请注意,`arrayMin`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +**语法** + +```sql +arrayMin([func,] arr) +``` + +**参数** + +- `func` — 类型为[表达式](../../sql-reference/data-types/special-data-types/expression.md)。 +- `arr` — 类型为[数组](../../sql-reference/data-types/array.md)。 + +**返回值** + +- 函数值的最小值(或数组最小值)。 + +类型:如果指定了`func`,则匹配`func`返回值类型,否则匹配数组元素类型。 + +**示例** + +查询语句: + +```sql +SELECT arrayMin([1, 2, 4]) AS res; +``` + +结果: + +```text +┌─res─┐ +│ 1 │ +└─────┘ +``` + +查询语句: + +```sql +SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res; +``` + +结果: + +```text +┌─res─┐ +│ -4 │ +└─────┘ +``` + +## arrayMax {#array-max} + +返回源数组中元素的最大值。 + +如果指定了`func` 函数,则返回此函数转换的元素的最大值。 + +请注意,`arrayMax`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions). 您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +**语法** + +```sql +arrayMax([func,] arr) +``` + +**参数** + +- `func` — 类型为[表达式](../../sql-reference/data-types/special-data-types/expression.md)。 +- `arr` — 类型为[数组](../../sql-reference/data-types/array.md)。 + +**返回值** + +- 函数值的最大值(或数组最大值)。 + +类型:如果指定了`func`,则匹配`func`返回值类型,否则匹配数组元素类型。 + +**示例** + +查询语句: + +```sql +SELECT arrayMax([1, 2, 4]) AS res; +``` + +结果: + +```text +┌─res─┐ +│ 4 │ +└─────┘ +``` + +查询语句: + +```sql +SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res; +``` + +结果: + +```text +┌─res─┐ +│ -1 │ +└─────┘ +``` + +## arraySum {#array-sum} + +返回源数组中元素的总和。 + +如果指定了 `func` 函数,则返回此函数转换的元素的总和。 + +请注意,`arraySum`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions). 您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +**语法** + +```sql +arraySum([func,] arr) +``` + +**参数** + +- `func` — 类型为[表达式](../../sql-reference/data-types/special-data-types/expression.md)。 +- `arr` — 类型为[数组](../../sql-reference/data-types/array.md)。 + +**返回值** + +- 函数值的总和(或数组总和)。 + +类型:源数组中的十进制数(或转换后的值,如果指定了`func`)-[Decimal128](../../sql-reference/data-types/decimal.md), 对于浮点数 — [Float64](../../sql-reference/data-types/float.md), 对于无符号数 — [UInt64](../../sql-reference/data-types/int-uint.md), 对于有符号数 — [Int64](../../sql-reference/data-types/int-uint.md)。 + +**示例** + +查询语句: + +```sql +SELECT arraySum([2, 3]) AS res; +``` + +结果: + +```text +┌─res─┐ +│ 5 │ +└─────┘ +``` + +查询语句: + +```sql +SELECT arraySum(x -> x*x, [2, 3]) AS res; +``` + +结果: + +```text +┌─res─┐ +│ 13 │ +└─────┘ +``` + +## arrayAvg {#array-avg} + +返回源数组中元素的平均值。 + +如果指定了 func 函数,则返回此函数转换的元素的平均值。 + +请注意,`arrayAvg`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions). 您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +**语法** + +```sql +arrayAvg([func,] arr) +``` + +**参数** + +- `func` — 类型为[表达式](../../sql-reference/data-types/special-data-types/expression.md)。 +- `arr` — 类型为[数组](../../sql-reference/data-types/array.md)。 + +**返回值** + +- 函数值的平均值(或数组平均值)。 + +类型为: [Float64](../../sql-reference/data-types/float.md). + +**示例** + +查询语句: + +```sql +SELECT arrayAvg([1, 2, 4]) AS res; +``` + +结果: + +```text +┌────────────────res─┐ +│ 2.3333333333333335 │ +└────────────────────┘ +``` + +查询语句: + +```sql +SELECT arrayAvg(x -> (x * x), [2, 4]) AS res; +``` + +结果: + +```text +┌─res─┐ +│ 10 │ +└─────┘ +``` + +## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} + +返回源数组中元素的部分和的数组(运行总和)。如果指定了 func 函数,则数组元素的值在求和之前由该函数转换。 + +示例: + +``` sql +SELECT arrayCumSum([1, 1, 1, 1]) AS res +``` + +``` text +┌─res──────────┐ +│ [1, 2, 3, 4] │ +└──────────────┘ +``` + +请注意,`arrayCumSum`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions). 您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayCumSumNonNegative(arr) {#arraycumsumnonnegativearr} + +与 `arrayCumSum` 相同,返回源数组中元素的部分和的数组(运行总和)。不同的`arrayCumSum`,当返回值包含小于零的值时,将该值替换为零,并以零参数执行后续计算。例如: + +``` sql +SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res +``` + +``` text +┌─res───────┐ +│ [1,2,0,1] │ +└───────────┘ +``` +请注意`arraySumNonNegative`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions). 您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 + +## arrayProduct {#arrayproduct} + +将一个[数组](../../sql-reference/data-types/array.md)中的元素相乘。 + +**语法** + +``` sql +arrayProduct(arr) +``` + +**参数** + +- `arr` — 数值类型的[数组](../../sql-reference/data-types/array.md)。 + +**返回值** + +- 数组元素的乘积。 + +类型为: [Float64](../../sql-reference/data-types/float.md). + +**示例** + +查询语句: + +``` sql +SELECT arrayProduct([1,2,3,4,5,6]) as res; +``` + +结果: + +``` text +┌─res───┐ +│ 720 │ +└───────┘ +``` + +查询语句: + +``` sql +SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as res, toTypeName(res); +``` + +返回值类型总是[Float64](../../sql-reference/data-types/float.md). 结果: + +``` text +┌─res─┬─toTypeName(arrayProduct(array(toDecimal64(1, 8), toDecimal64(2, 8), toDecimal64(3, 8))))─┐ +│ 6 │ Float64 │ +└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘ +``` [来源文章](https://clickhouse.com/docs/en/query_language/functions/array_functions/) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index fa1664b120a..3a6dfb161a2 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -109,7 +109,7 @@ std::vector Client::loadWarningMessages() connection->sendQuery(connection_parameters.timeouts, "SELECT message FROM system.warnings", "" /* query_id */, QueryProcessingStage::Complete, &global_context->getSettingsRef(), - &global_context->getClientInfo(), false); + &global_context->getClientInfo(), false, {}); while (true) { Packet packet = connection->receivePacket(); diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index fe52e3a825e..1d30b541ca0 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -68,6 +68,7 @@ namespace ErrorCodes extern const int NOT_ENOUGH_SPACE; extern const int NOT_IMPLEMENTED; extern const int CANNOT_KILL; + extern const int BAD_ARGUMENTS; } } @@ -1062,8 +1063,11 @@ namespace return pid; } - int stop(const fs::path & pid_file, bool force) + int stop(const fs::path & pid_file, bool force, bool do_not_kill) { + if (force && do_not_kill) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible"); + UInt64 pid = isRunning(pid_file); if (!pid) @@ -1092,9 +1096,15 @@ namespace if (try_num == num_tries) { - fmt::print("Will terminate forcefully.\n", pid); + if (do_not_kill) + { + fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid); + return 1; + } + + fmt::print("Will terminate forcefully (pid = {}).\n", pid); if (0 == kill(pid, 9)) - fmt::print("Sent kill signal.\n", pid); + fmt::print("Sent kill signal (pid = {}).\n", pid); else throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR); @@ -1175,6 +1185,7 @@ int mainEntryClickHouseStop(int argc, char ** argv) ("prefix", po::value()->default_value("/"), "prefix for all paths") ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("force", po::bool_switch(), "Stop with KILL signal instead of TERM") + ("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help") ; po::variables_map options; @@ -1189,7 +1200,9 @@ int mainEntryClickHouseStop(int argc, char ** argv) fs::path prefix = options["prefix"].as(); fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; - return stop(pid_file, options["force"].as()); + bool force = options["force"].as(); + bool do_not_kill = options["do-not-kill"].as(); + return stop(pid_file, force, do_not_kill); } catch (...) { @@ -1247,6 +1260,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv) ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") ("force", po::value()->default_value(false), "Stop with KILL signal instead of TERM") + ("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help") ; po::variables_map options; @@ -1265,7 +1279,9 @@ int mainEntryClickHouseRestart(int argc, char ** argv) fs::path config = prefix / options["config-path"].as() / "config.xml"; fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; - if (int res = stop(pid_file, options["force"].as())) + bool force = options["force"].as(); + bool do_not_kill = options["do-not-kill"].as(); + if (int res = stop(pid_file, force, do_not_kill)) return res; return start(user, executable, config, pid_file); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 02c985b6efb..2b1a0809143 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -540,7 +540,7 @@ static void sanityChecks(Server & server) try { if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos) - server.context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded."); + server.context()->addWarningMessage("Linux is not using a fast TSC clock source. Performance can be degraded."); } catch (...) { @@ -558,7 +558,7 @@ static void sanityChecks(Server & server) try { if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos) - server.context()->addWarningMessage("Linux transparent hugepage are set to \"always\"."); + server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\"."); } catch (...) { @@ -1088,11 +1088,8 @@ int Server::main(const std::vector & /*args*/) total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); - if (config->has("global_memory_usage_overcommit_max_wait_microseconds")) - { - UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 0); - global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time); - } + UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 200); + global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time); total_memory_tracker.setOvercommitTracker(global_overcommit_tracker); // FIXME logging-related things need synchronization -- see the 'Logger * log' saved @@ -1294,17 +1291,11 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for {}", server.getDescription()); } - auto & access_control = global_context->getAccessControl(); - if (config().has("custom_settings_prefixes")) - access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes")); - - access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true)); - access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true)); - /// Initialize access storages. + auto & access_control = global_context->getAccessControl(); try { - access_control.addStoragesFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); + access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); } catch (...) { diff --git a/programs/server/config.xml b/programs/server/config.xml index f5f22d1fcdf..bd54051be19 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -545,6 +545,14 @@ --> + + + false + + default diff --git a/programs/server/play.html b/programs/server/play.html index 06fc5d8de9a..a36cebb5b02 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -129,8 +129,8 @@ #query_div { - /* Make enough space for even huge queries. */ - height: 20%; + /* Make enough space for medium/large queries but allowing query textarea to grow. */ + min-height: 20%; } #query diff --git a/programs/server/users.xml b/programs/server/users.xml index f18562071d8..96067d01a6b 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -6,9 +6,6 @@ - - 10000000000 - + + + + 1 + + + + 1 + + + + 1 + + + + + + diff --git a/tests/integration/test_disabled_access_control_improvements/configs/config.d/disable_access_control_improvements.xml b/tests/integration/test_disabled_access_control_improvements/configs/config.d/disable_access_control_improvements.xml new file mode 100644 index 00000000000..0192e211b68 --- /dev/null +++ b/tests/integration/test_disabled_access_control_improvements/configs/config.d/disable_access_control_improvements.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/integration/test_disabled_access_control_improvements/configs/users.d/another_user.xml b/tests/integration/test_disabled_access_control_improvements/configs/users.d/another_user.xml new file mode 100644 index 00000000000..19249011968 --- /dev/null +++ b/tests/integration/test_disabled_access_control_improvements/configs/users.d/another_user.xml @@ -0,0 +1,18 @@ + + + + + 0 + + + + + + + ::/0 + + default + default + + + diff --git a/tests/integration/test_disabled_access_control_improvements/configs/users.d/row_policy.xml b/tests/integration/test_disabled_access_control_improvements/configs/users.d/row_policy.xml new file mode 120000 index 00000000000..593f0e42662 --- /dev/null +++ b/tests/integration/test_disabled_access_control_improvements/configs/users.d/row_policy.xml @@ -0,0 +1 @@ +../../normal_filters.xml \ No newline at end of file diff --git a/tests/integration/test_disabled_access_control_improvements/configs/users.xml b/tests/integration/test_disabled_access_control_improvements/configs/users.xml new file mode 100644 index 00000000000..5dc65384a2d --- /dev/null +++ b/tests/integration/test_disabled_access_control_improvements/configs/users.xml @@ -0,0 +1,24 @@ + + + + + + + ::/0 + + default + default + + + + + ::/0 + + default + default + + + + + + diff --git a/tests/integration/test_disabled_access_control_improvements/no_filters.xml b/tests/integration/test_disabled_access_control_improvements/no_filters.xml new file mode 100644 index 00000000000..fddc67fc3f9 --- /dev/null +++ b/tests/integration/test_disabled_access_control_improvements/no_filters.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_disabled_access_control_improvements/no_rows.xml b/tests/integration/test_disabled_access_control_improvements/no_rows.xml new file mode 100644 index 00000000000..760203210b2 --- /dev/null +++ b/tests/integration/test_disabled_access_control_improvements/no_rows.xml @@ -0,0 +1,23 @@ + + + + + + + + + NULL + + + + NULL + + + + NULL + + + + + + diff --git a/tests/integration/test_disabled_access_control_improvements/normal_filters.xml b/tests/integration/test_disabled_access_control_improvements/normal_filters.xml new file mode 100644 index 00000000000..6f5cd8701f6 --- /dev/null +++ b/tests/integration/test_disabled_access_control_improvements/normal_filters.xml @@ -0,0 +1,36 @@ + + + + + + + + + + a = 1 + + + + + a + b < 1 or c - d > 5 + + + + + c = 1 + + + + + + + + + + a = 1 + + + + + + diff --git a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py new file mode 100644 index 00000000000..509b4de1a37 --- /dev/null +++ b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py @@ -0,0 +1,418 @@ +import os +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/config.d/disable_access_control_improvements.xml"], + user_configs=[ + "configs/users.d/row_policy.xml", + "configs/users.d/another_user.xml", + ], +) + + +def copy_policy_xml(local_file_name): + script_dir = os.path.dirname(os.path.realpath(__file__)) + node.copy_file_to_container( + os.path.join(script_dir, local_file_name), + "/etc/clickhouse-server/users.d/row_policy.xml", + ) + node.query("SYSTEM RELOAD CONFIG") + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + + node.query( + """ + CREATE DATABASE mydb; + + CREATE TABLE mydb.filtered_table1 (a UInt8, b UInt8) ENGINE MergeTree ORDER BY a; + INSERT INTO mydb.filtered_table1 values (0, 0), (0, 1), (1, 0), (1, 1); + + CREATE TABLE mydb.table (a UInt8, b UInt8) ENGINE MergeTree ORDER BY a; + INSERT INTO mydb.table values (0, 0), (0, 1), (1, 0), (1, 1); + + CREATE TABLE mydb.filtered_table2 (a UInt8, b UInt8, c UInt8, d UInt8) ENGINE MergeTree ORDER BY a; + INSERT INTO mydb.filtered_table2 values (0, 0, 0, 0), (1, 2, 3, 4), (4, 3, 2, 1), (0, 0, 6, 0); + + CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; + INSERT INTO mydb.filtered_table3 values (0, 0), (0, 1), (1, 0), (1, 1); + + CREATE TABLE mydb.`.filtered_table4` (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; + INSERT INTO mydb.`.filtered_table4` values (0, 0), (0, 1), (1, 0), (1, 1); + + CREATE TABLE mydb.local (a UInt8, b UInt8) ENGINE MergeTree ORDER BY a; + """ + ) + + node.query("INSERT INTO mydb.local values (2, 0), (2, 1), (1, 0), (1, 1)") + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def reset_policies(): + try: + yield + finally: + copy_policy_xml("normal_filters.xml") + node.query("DROP POLICY IF EXISTS pA, pB ON mydb.filtered_table1") + + +def test_introspection(): + policies = [ + [ + "another ON mydb.filtered_table1", + "another", + "mydb", + "filtered_table1", + "6068883a-0e9d-f802-7e22-0144f8e66d3c", + "users.xml", + "1", + 0, + 0, + "['another']", + "[]", + ], + [ + "another ON mydb.filtered_table2", + "another", + "mydb", + "filtered_table2", + "c019e957-c60b-d54e-cc52-7c90dac5fb01", + "users.xml", + "1", + 0, + 0, + "['another']", + "[]", + ], + [ + "another ON mydb.filtered_table3", + "another", + "mydb", + "filtered_table3", + "4cb080d0-44e8-dbef-6026-346655143628", + "users.xml", + "1", + 0, + 0, + "['another']", + "[]", + ], + [ + "another ON mydb.local", + "another", + "mydb", + "local", + "5b23c389-7e18-06bf-a6bc-dd1afbbc0a97", + "users.xml", + "a = 1", + 0, + 0, + "['another']", + "[]", + ], + [ + "default ON mydb.filtered_table1", + "default", + "mydb", + "filtered_table1", + "9e8a8f62-4965-2b5e-8599-57c7b99b3549", + "users.xml", + "a = 1", + 0, + 0, + "['default']", + "[]", + ], + [ + "default ON mydb.filtered_table2", + "default", + "mydb", + "filtered_table2", + "cffae79d-b9bf-a2ef-b798-019c18470b25", + "users.xml", + "a + b < 1 or c - d > 5", + 0, + 0, + "['default']", + "[]", + ], + [ + "default ON mydb.filtered_table3", + "default", + "mydb", + "filtered_table3", + "12fc5cef-e3da-3940-ec79-d8be3911f42b", + "users.xml", + "c = 1", + 0, + 0, + "['default']", + "[]", + ], + [ + "default ON mydb.local", + "default", + "mydb", + "local", + "cdacaeb5-1d97-f99d-2bb0-4574f290629c", + "users.xml", + "1", + 0, + 0, + "['default']", + "[]", + ], + ] + assert node.query( + "SELECT * from system.row_policies ORDER BY short_name, database, table" + ) == TSV(policies) + + +def test_dcl_introspection(): + assert node.query("SHOW POLICIES") == TSV( + [ + "another ON mydb.filtered_table1", + "another ON mydb.filtered_table2", + "another ON mydb.filtered_table3", + "another ON mydb.local", + "default ON mydb.filtered_table1", + "default ON mydb.filtered_table2", + "default ON mydb.filtered_table3", + "default ON mydb.local", + ] + ) + + assert node.query("SHOW POLICIES ON mydb.filtered_table1") == TSV( + ["another", "default"] + ) + assert node.query("SHOW POLICIES ON mydb.local") == TSV(["another", "default"]) + assert node.query("SHOW POLICIES ON mydb.*") == TSV( + [ + "another ON mydb.filtered_table1", + "another ON mydb.filtered_table2", + "another ON mydb.filtered_table3", + "another ON mydb.local", + "default ON mydb.filtered_table1", + "default ON mydb.filtered_table2", + "default ON mydb.filtered_table3", + "default ON mydb.local", + ] + ) + assert node.query("SHOW POLICIES default") == TSV( + [ + "default ON mydb.filtered_table1", + "default ON mydb.filtered_table2", + "default ON mydb.filtered_table3", + "default ON mydb.local", + ] + ) + + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table1") + == "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default\n" + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table2") + == "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default\n" + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table3") + == "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default\n" + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.local") + == "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default\n" + ) + + assert node.query("SHOW CREATE POLICY default") == TSV( + [ + "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", + "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", + "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", + "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default", + ] + ) + assert node.query("SHOW CREATE POLICIES ON mydb.filtered_table1") == TSV( + [ + "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", + "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", + ] + ) + assert node.query("SHOW CREATE POLICIES ON mydb.*") == TSV( + [ + "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", + "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another", + "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another", + "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another", + "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", + "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", + "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", + "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default", + ] + ) + assert node.query("SHOW CREATE POLICIES") == TSV( + [ + "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", + "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another", + "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another", + "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another", + "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", + "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", + "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", + "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default", + ] + ) + + expected_access = ( + "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another\n" + "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another\n" + "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another\n" + "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another\n" + "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default\n" + "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default\n" + "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default\n" + "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default\n" + ) + assert expected_access in node.query("SHOW ACCESS") + + copy_policy_xml("all_rows.xml") + assert node.query("SHOW POLICIES") == TSV( + [ + "another ON mydb.filtered_table1", + "another ON mydb.filtered_table2", + "another ON mydb.filtered_table3", + "default ON mydb.filtered_table1", + "default ON mydb.filtered_table2", + "default ON mydb.filtered_table3", + ] + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table1") + == "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING 1 TO default\n" + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table2") + == "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING 1 TO default\n" + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table3") + == "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING 1 TO default\n" + ) + + copy_policy_xml("no_rows.xml") + assert node.query("SHOW POLICIES") == TSV( + [ + "another ON mydb.filtered_table1", + "another ON mydb.filtered_table2", + "another ON mydb.filtered_table3", + "default ON mydb.filtered_table1", + "default ON mydb.filtered_table2", + "default ON mydb.filtered_table3", + ] + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table1") + == "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING NULL TO default\n" + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table2") + == "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING NULL TO default\n" + ) + assert ( + node.query("SHOW CREATE POLICY default ON mydb.filtered_table3") + == "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING NULL TO default\n" + ) + + copy_policy_xml("no_filters.xml") + assert node.query("SHOW POLICIES") == "" + + +def test_dcl_management(): + copy_policy_xml("no_filters.xml") + assert node.query("SHOW POLICIES") == "" + + node.query("CREATE POLICY pA ON mydb.filtered_table1 FOR SELECT USING ab") + assert node.query("SELECT * FROM mydb.filtered_table1") == TSV([[1, 0]]) + + node.query("ALTER POLICY pA ON mydb.filtered_table1 RENAME TO pB") + assert node.query("SELECT * FROM mydb.filtered_table1") == TSV([[1, 0]]) + assert node.query("SHOW POLICIES ON mydb.filtered_table1") == "pB\n" + assert ( + node.query("SHOW CREATE POLICY pB ON mydb.filtered_table1") + == "CREATE ROW POLICY pB ON mydb.filtered_table1 FOR SELECT USING a > b TO default\n" + ) + + node.query("DROP POLICY pB ON mydb.filtered_table1") + assert node.query("SELECT * FROM mydb.filtered_table1") == TSV( + [[0, 0], [0, 1], [1, 0], [1, 1]] + ) + assert node.query("SHOW POLICIES") == "" + + +def test_dcl_users_with_policies_from_users_xml(): + node.query("CREATE USER X") + node.query("GRANT SELECT ON mydb.filtered_table1 TO X") + assert node.query("SELECT * FROM mydb.filtered_table1") == TSV([[1, 0], [1, 1]]) + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == "" + node.query("DROP USER X") + + +def test_some_users_without_policies(): + copy_policy_xml("no_filters.xml") + assert node.query("SHOW POLICIES") == "" + node.query("CREATE USER X, Y") + node.query("GRANT SELECT ON mydb.filtered_table1 TO X, Y") + + # permissive a >= b for X, none for Y + node.query( + "CREATE POLICY pA ON mydb.filtered_table1 FOR SELECT USING a >= b AS permissive TO X" + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == TSV( + [[0, 0], [1, 0], [1, 1]] + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="Y") == "" + + # restrictive a >=b for X, none for Y + node.query("ALTER POLICY pA ON mydb.filtered_table1 AS restrictive") + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == "" + assert node.query("SELECT * FROM mydb.filtered_table1", user="Y") == "" + + # permissive a >= b for X, restrictive a <= b for X, none for Y + node.query("ALTER POLICY pA ON mydb.filtered_table1 AS permissive") + node.query( + "CREATE POLICY pB ON mydb.filtered_table1 FOR SELECT USING a <= b AS restrictive TO X" + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == TSV( + [[0, 0], [1, 1]] + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="Y") == "" + + # permissive a >= b for X, restrictive a <= b for Y + node.query("ALTER POLICY pB ON mydb.filtered_table1 TO Y") + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == TSV( + [[0, 0], [1, 0], [1, 1]] + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="Y") == "" + + node.query("DROP POLICY pA, pB ON mydb.filtered_table1") + node.query("DROP USER X, Y") diff --git a/tests/integration/test_old_parts_finally_removed/__init__.py b/tests/integration/test_old_parts_finally_removed/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_old_parts_finally_removed/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_old_parts_finally_removed/test.py b/tests/integration/test_old_parts_finally_removed/test.py new file mode 100644 index 00000000000..108b72c5ccd --- /dev/null +++ b/tests/integration/test_old_parts_finally_removed/test.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +import os +import time + +import helpers.client as client +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", with_zookeeper=True) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + + +def test_part_finally_removed(started_cluster): + node1.query( + "CREATE TABLE drop_outdated_part (Key UInt64) ENGINE = ReplicatedMergeTree('/table/d', '1') ORDER BY tuple() SETTINGS old_parts_lifetime=10, cleanup_delay_period=10, cleanup_delay_period_random_add=1" + ) + node1.query("INSERT INTO drop_outdated_part VALUES (1)") + + node1.query("OPTIMIZE TABLE drop_outdated_part FINAL") + + node1.exec_in_container( + [ + "bash", + "-c", + "chown -R root:root /var/lib/clickhouse/data/default/drop_outdated_part/all_0_0_0", + ], + privileged=True, + user="root", + ) + + node1.query( + "ALTER TABLE drop_outdated_part MODIFY SETTING old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1" + ) + + for i in range(60): + if node1.contains_in_log("Cannot quickly remove directory"): + break + time.sleep(0.5) + + node1.exec_in_container( + [ + "bash", + "-c", + f"chown -R {os.getuid()}:{os.getgid()} /var/lib/clickhouse/data/default/drop_outdated_part/*", + ], + privileged=True, + user="root", + ) + + for i in range(60): + + if ( + node1.query( + "SELECT count() from system.parts WHERE table = 'drop_outdated_part'" + ) + == "1\n" + ): + out = node1.exec_in_container( + [ + "bash", + "-c", + "ls /var/lib/clickhouse/data/default/drop_outdated_part | grep '_' | grep 'all'", + ], + privileged=True, + user="root", + ) + + assert ( + "all_0_0_0" not in out + ), f"Print found part which must be deleted: {out}" + + break + time.sleep(0.5) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 02301236389..92c109974e1 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -825,6 +825,9 @@ def test_sync_replica(started_cluster): settings=settings, ) + # wait for host to reconnect + dummy_node.query_with_retry("SELECT * FROM system.zookeeper WHERE path='/'") + dummy_node.query("SYSTEM SYNC DATABASE REPLICA test_sync_database") assert dummy_node.query( @@ -834,3 +837,30 @@ def test_sync_replica(started_cluster): assert main_node.query( "SELECT count() FROM system.tables where database='test_sync_database'" ).strip() == str(number_of_tables) + + engine_settings = {"default_table_engine": "ReplicatedMergeTree"} + dummy_node.query( + "CREATE TABLE test_sync_database.table (n int, primary key n) partition by n", + settings=engine_settings, + ) + main_node.query("INSERT INTO test_sync_database.table SELECT * FROM numbers(10)") + dummy_node.query("TRUNCATE TABLE test_sync_database.table", settings=settings) + dummy_node.query( + "ALTER TABLE test_sync_database.table ADD COLUMN m int", settings=settings + ) + + main_node.query( + "SYSTEM SYNC DATABASE REPLICA ON CLUSTER test_sync_database test_sync_database" + ) + + lp1 = main_node.query( + "select value from system.zookeeper where path='/clickhouse/databases/test1/replicas/shard1|replica1' and name='log_ptr'" + ) + lp2 = main_node.query( + "select value from system.zookeeper where path='/clickhouse/databases/test1/replicas/shard1|replica2' and name='log_ptr'" + ) + max_lp = main_node.query( + "select value from system.zookeeper where path='/clickhouse/databases/test1/' and name='max_log_ptr'" + ) + assert lp1 == max_lp + assert lp2 == max_lp diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index c0ebfc7a070..5a2f80c5e28 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -409,45 +409,6 @@ def test_reload_users_xml_by_timer(): def test_introspection(): policies = [ - [ - "another ON mydb.filtered_table1", - "another", - "mydb", - "filtered_table1", - "6068883a-0e9d-f802-7e22-0144f8e66d3c", - "users.xml", - "1", - 0, - 0, - "['another']", - "[]", - ], - [ - "another ON mydb.filtered_table2", - "another", - "mydb", - "filtered_table2", - "c019e957-c60b-d54e-cc52-7c90dac5fb01", - "users.xml", - "1", - 0, - 0, - "['another']", - "[]", - ], - [ - "another ON mydb.filtered_table3", - "another", - "mydb", - "filtered_table3", - "4cb080d0-44e8-dbef-6026-346655143628", - "users.xml", - "1", - 0, - 0, - "['another']", - "[]", - ], [ "another ON mydb.local", "another", @@ -500,19 +461,6 @@ def test_introspection(): "['default']", "[]", ], - [ - "default ON mydb.local", - "default", - "mydb", - "local", - "cdacaeb5-1d97-f99d-2bb0-4574f290629c", - "users.xml", - "1", - 0, - 0, - "['default']", - "[]", - ], ] assert node.query( "SELECT * from system.row_policies ORDER BY short_name, database, table" @@ -522,31 +470,21 @@ def test_introspection(): def test_dcl_introspection(): assert node.query("SHOW POLICIES") == TSV( [ - "another ON mydb.filtered_table1", - "another ON mydb.filtered_table2", - "another ON mydb.filtered_table3", "another ON mydb.local", "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3", - "default ON mydb.local", ] ) - assert node.query("SHOW POLICIES ON mydb.filtered_table1") == TSV( - ["another", "default"] - ) - assert node.query("SHOW POLICIES ON mydb.local") == TSV(["another", "default"]) + assert node.query("SHOW POLICIES ON mydb.filtered_table1") == TSV(["default"]) + assert node.query("SHOW POLICIES ON mydb.local") == TSV(["another"]) assert node.query("SHOW POLICIES ON mydb.*") == TSV( [ - "another ON mydb.filtered_table1", - "another ON mydb.filtered_table2", - "another ON mydb.filtered_table3", "another ON mydb.local", "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3", - "default ON mydb.local", ] ) assert node.query("SHOW POLICIES default") == TSV( @@ -554,7 +492,6 @@ def test_dcl_introspection(): "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3", - "default ON mydb.local", ] ) @@ -570,9 +507,8 @@ def test_dcl_introspection(): node.query("SHOW CREATE POLICY default ON mydb.filtered_table3") == "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default\n" ) - assert ( - node.query("SHOW CREATE POLICY default ON mydb.local") - == "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default\n" + assert "no row policy" in node.query_and_get_error( + "SHOW CREATE POLICY default ON mydb.local" ) assert node.query("SHOW CREATE POLICY default") == TSV( @@ -580,58 +516,41 @@ def test_dcl_introspection(): "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", - "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default", ] ) assert node.query("SHOW CREATE POLICIES ON mydb.filtered_table1") == TSV( [ - "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", ] ) assert node.query("SHOW CREATE POLICIES ON mydb.*") == TSV( [ - "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", - "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another", - "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another", "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", - "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default", ] ) assert node.query("SHOW CREATE POLICIES") == TSV( [ - "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", - "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another", - "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another", "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", - "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default", ] ) expected_access = ( - "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another\n" - "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another\n" - "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another\n" "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another\n" "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default\n" "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default\n" "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default\n" - "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default\n" ) assert expected_access in node.query("SHOW ACCESS") copy_policy_xml("all_rows.xml") assert node.query("SHOW POLICIES") == TSV( [ - "another ON mydb.filtered_table1", - "another ON mydb.filtered_table2", - "another ON mydb.filtered_table3", "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3", @@ -653,9 +572,6 @@ def test_dcl_introspection(): copy_policy_xml("no_rows.xml") assert node.query("SHOW POLICIES") == TSV( [ - "another ON mydb.filtered_table1", - "another ON mydb.filtered_table2", - "another ON mydb.filtered_table3", "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3", @@ -683,7 +599,9 @@ def test_dcl_management(): assert node.query("SHOW POLICIES") == "" node.query("CREATE POLICY pA ON mydb.filtered_table1 FOR SELECT USING a= b for X, none for Y + node.query( + "CREATE POLICY pA ON mydb.filtered_table1 FOR SELECT USING a >= b AS permissive TO X" + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == TSV( + [[0, 0], [1, 0], [1, 1]] + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="Y") == TSV( + [[0, 0], [0, 1], [1, 0], [1, 1]] + ) + + # restrictive a >=b for X, none for Y + node.query("ALTER POLICY pA ON mydb.filtered_table1 AS restrictive") + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == TSV( + [[0, 0], [1, 0], [1, 1]] + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="Y") == TSV( + [[0, 0], [0, 1], [1, 0], [1, 1]] + ) + + # permissive a >= b for X, restrictive a <= b for X, none for Y + node.query("ALTER POLICY pA ON mydb.filtered_table1 AS permissive") + node.query( + "CREATE POLICY pB ON mydb.filtered_table1 FOR SELECT USING a <= b AS restrictive TO X" + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == TSV( + [[0, 0], [1, 1]] + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="Y") == TSV( + [[0, 0], [0, 1], [1, 0], [1, 1]] + ) + + # permissive a >= b for X, restrictive a <= b for Y + node.query("ALTER POLICY pB ON mydb.filtered_table1 TO Y") + assert node.query("SELECT * FROM mydb.filtered_table1", user="X") == TSV( + [[0, 0], [1, 0], [1, 1]] + ) + assert node.query("SELECT * FROM mydb.filtered_table1", user="Y") == TSV( + [[0, 0], [0, 1], [1, 1]] + ) + + node.query("DROP POLICY pA, pB ON mydb.filtered_table1") + node.query("DROP USER X, Y") + + def test_tags_with_db_and_table_names(): copy_policy_xml("tags_with_db_and_table_names.xml") diff --git a/tests/integration/test_s3_cluster/configs/cluster.xml b/tests/integration/test_s3_cluster/configs/cluster.xml index 404a15b1273..18f15763633 100644 --- a/tests/integration/test_s3_cluster/configs/cluster.xml +++ b/tests/integration/test_s3_cluster/configs/cluster.xml @@ -21,4 +21,7 @@ + + cluster_simple + \ No newline at end of file diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 93708acd49c..2cbb36fcf06 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -95,6 +95,29 @@ def test_count(started_cluster): assert TSV(pure_s3) == TSV(s3_distibuted) +def test_count_macro(started_cluster): + node = started_cluster.instances["s0_0_0"] + + s3_macro = node.query( + """ + SELECT count(*) from s3Cluster( + '{default_cluster_macro}', 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""" + ) + # print(s3_distibuted) + s3_distibuted = node.query( + """ + SELECT count(*) from s3Cluster( + 'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""" + ) + # print(s3_distibuted) + + assert TSV(s3_macro) == TSV(s3_distibuted) + + def test_union_all(started_cluster): node = started_cluster.instances["s0_0_0"] pure_s3 = node.query( diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index 75d4732ffc4..aed522bb5d1 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -100,29 +100,29 @@ def test_select_clamps_settings(): # Check that shards clamp passed settings. query = "SELECT hostName() as host, name, value FROM shard_settings WHERE name = 'max_memory_usage' OR name = 'readonly' ORDER BY host, name, value" assert ( - distributed.query(query) == "node1\tmax_memory_usage\t99999999\n" + distributed.query(query) == "node1\tmax_memory_usage\t50000000\n" "node1\treadonly\t0\n" - "node2\tmax_memory_usage\t10000000000\n" + "node2\tmax_memory_usage\t0\n" "node2\treadonly\t1\n" ) assert ( distributed.query(query, user="normal") == "node1\tmax_memory_usage\t80000000\n" "node1\treadonly\t0\n" - "node2\tmax_memory_usage\t10000000000\n" + "node2\tmax_memory_usage\t0\n" "node2\treadonly\t1\n" ) assert ( distributed.query(query, user="wasteful") == "node1\tmax_memory_usage\t99999999\n" "node1\treadonly\t0\n" - "node2\tmax_memory_usage\t10000000000\n" + "node2\tmax_memory_usage\t0\n" "node2\treadonly\t1\n" ) assert ( distributed.query(query, user="readonly") - == "node1\tmax_memory_usage\t99999999\n" + == "node1\tmax_memory_usage\t50000000\n" "node1\treadonly\t1\n" - "node2\tmax_memory_usage\t10000000000\n" + "node2\tmax_memory_usage\t0\n" "node2\treadonly\t1\n" ) @@ -130,14 +130,14 @@ def test_select_clamps_settings(): distributed.query(query, settings={"max_memory_usage": 1}) == "node1\tmax_memory_usage\t11111111\n" "node1\treadonly\t0\n" - "node2\tmax_memory_usage\t10000000000\n" + "node2\tmax_memory_usage\t0\n" "node2\treadonly\t1\n" ) assert ( distributed.query(query, settings={"max_memory_usage": 40000000, "readonly": 2}) == "node1\tmax_memory_usage\t40000000\n" "node1\treadonly\t2\n" - "node2\tmax_memory_usage\t10000000000\n" + "node2\tmax_memory_usage\t0\n" "node2\treadonly\t1\n" ) assert ( @@ -146,7 +146,7 @@ def test_select_clamps_settings(): ) == "node1\tmax_memory_usage\t99999999\n" "node1\treadonly\t2\n" - "node2\tmax_memory_usage\t10000000000\n" + "node2\tmax_memory_usage\t0\n" "node2\treadonly\t1\n" ) diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index b4c0cec9f48..a90273e7a7a 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -115,7 +115,7 @@ def test_smoke(): "SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin", ) - == "10000000000\n" + == "0\n" ) instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") @@ -158,7 +158,7 @@ def test_smoke(): "SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin", ) - == "10000000000\n" + == "0\n" ) instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") @@ -228,7 +228,7 @@ def test_settings_from_granted_role(): "SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin", ) - == "10000000000\n" + == "0\n" ) instance.query("SET max_memory_usage = 120000000", user="robin") @@ -240,7 +240,7 @@ def test_settings_from_granted_role(): "SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin", ) - == "10000000000\n" + == "0\n" ) instance.query("SET max_memory_usage = 120000000", user="robin") assert system_settings_profile_elements(role_name="worker") == [] @@ -278,7 +278,7 @@ def test_settings_from_granted_role(): "SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin", ) - == "10000000000\n" + == "0\n" ) instance.query("SET max_memory_usage = 120000000", user="robin") assert system_settings_profile("xyz") == [ @@ -360,7 +360,7 @@ def test_alter_and_drop(): "SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin", ) - == "10000000000\n" + == "0\n" ) instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") @@ -374,17 +374,17 @@ def test_show_profiles(): assert instance.query("SHOW CREATE PROFILE xyz") == "CREATE SETTINGS PROFILE xyz\n" assert ( instance.query("SHOW CREATE SETTINGS PROFILE default") - == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" + == "CREATE SETTINGS PROFILE default SETTINGS load_balancing = \\'random\\'\n" ) assert ( instance.query("SHOW CREATE PROFILES") - == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" + == "CREATE SETTINGS PROFILE default SETTINGS load_balancing = \\'random\\'\n" "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n" "CREATE SETTINGS PROFILE xyz\n" ) expected_access = ( - "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" + "CREATE SETTINGS PROFILE default SETTINGS load_balancing = \\'random\\'\n" "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n" "CREATE SETTINGS PROFILE xyz\n" ) diff --git a/tests/integration/test_storage_hdfs/configs/macro.xml b/tests/integration/test_storage_hdfs/configs/macro.xml new file mode 100644 index 00000000000..c2e11b47a5e --- /dev/null +++ b/tests/integration/test_storage_hdfs/configs/macro.xml @@ -0,0 +1,5 @@ + + + test_cluster_two_shards + + \ No newline at end of file diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index bd9e61a9422..0490c0c1f0d 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -2,10 +2,13 @@ import os import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV from pyhdfs import HdfsClient cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_hdfs=True) +node1 = cluster.add_instance( + "node1", main_configs=["configs/macro.xml"], with_hdfs=True +) @pytest.fixture(scope="module") @@ -589,6 +592,22 @@ def test_cluster_join(started_cluster): assert "AMBIGUOUS_COLUMN_NAME" not in result +def test_cluster_macro(started_cluster): + with_macro = node1.query( + """ + SELECT id FROM hdfsCluster('{default_cluster_macro}', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') + """ + ) + + no_macro = node1.query( + """ + SELECT id FROM hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') + """ + ) + + assert TSV(with_macro) == TSV(no_macro) + + def test_virtual_columns_2(started_cluster): hdfs_api = started_cluster.hdfs_api diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index a27b5a134e4..166ba773e27 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -570,13 +570,12 @@ def test_kafka_formats(kafka_cluster): b"\x05\x01\x02\x69\x64\x05\x49\x6e\x74\x36\x34\x00\x00\x00\x00\x00\x00\x00\x00\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x06\x55\x49\x6e\x74\x31\x36\x00\x00\x04\x76\x61\x6c\x31\x06\x53\x74\x72\x69\x6e\x67\x02\x41\x4d\x04\x76\x61\x6c\x32\x07\x46\x6c\x6f\x61\x74\x33\x32\x00\x00\x00\x3f\x04\x76\x61\x6c\x33\x05\x55\x49\x6e\x74\x38\x01", # '' # On empty message exception happens: DB::Exception: Attempt to read after eof - # /src/IO/VarInt.h:122: DB::throwReadAfterEOF() @ 0x15c34487 in /usr/bin/clickhouse - # /src/IO/VarInt.h:135: void DB::readVarUIntImpl(unsigned long&, DB::ReadBuffer&) @ 0x15c68bb7 in /usr/bin/clickhouse - # /src/IO/VarInt.h:149: DB::readVarUInt(unsigned long&, DB::ReadBuffer&) @ 0x15c68844 in /usr/bin/clickhouse - # /src/DataStreams/NativeBlockInputStream.cpp:124: DB::NativeBlockInputStream::readImpl() @ 0x1d3e2778 in /usr/bin/clickhouse - # /src/DataStreams/IBlockInputStream.cpp:60: DB::IBlockInputStream::read() @ 0x1c9c92fd in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/NativeFormat.h:42: DB::NativeInputFormatFromNativeBlockInputStream::generate() @ 0x1df1ea79 in /usr/bin/clickhouse - # /src/Processors/ISource.cpp:48: DB::ISource::work() @ 0x1dd79737 in /usr/bin/clickhouse + # 1. DB::throwReadAfterEOF() @ 0xb76449b in /usr/bin/clickhouse + # 2. ? @ 0xb79cb0b in /usr/bin/clickhouse + # 3. DB::NativeReader::read() @ 0x16e7a084 in /usr/bin/clickhouse + # 4. DB::NativeInputFormat::generate() @ 0x16f76922 in /usr/bin/clickhouse + # 5. DB::ISource::tryGenerate() @ 0x16e90bd5 in /usr/bin/clickhouse + # 6. DB::ISource::work() @ 0x16e9087a in /usr/bin/clickhouse ], }, "MsgPack": { @@ -4051,7 +4050,7 @@ def test_issue26643(kafka_cluster): kafka_num_consumers = 4, kafka_skip_broken_messages = 10000; - SET allow_suspicious_low_cardinality_types=1; + SET allow_suspicious_low_cardinality_types=1; CREATE TABLE test.log ( @@ -4095,7 +4094,7 @@ def test_issue26643(kafka_cluster): expected = """\ 2021-08-15 07:00:00 server1 443 50000 ['1','2'] [0,0] [444,0] [123123,0] ['adsfasd',''] GET -2021-08-15 07:00:02 0 0 [] [] [] [] [] +2021-08-15 07:00:02 0 0 [] [] [] [] [] 2021-08-15 07:00:02 0 0 [] [] [] [] [] """ assert TSV(result) == TSV(expected) diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 6b54ce0a847..d8ca207d0a6 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -173,6 +173,7 @@ def test_predefined_connection_configuration(started_cluster): simple_mongo_table.insert_many(data) node = started_cluster.instances["node"] + node.query("drop table if exists simple_mongo_table") node.query( "create table simple_mongo_table(key UInt64, data String) engine = MongoDB(mongo1)" ) diff --git a/tests/performance/jit_sort.xml b/tests/performance/jit_sort.xml new file mode 100644 index 00000000000..f99124a360b --- /dev/null +++ b/tests/performance/jit_sort.xml @@ -0,0 +1,122 @@ + + + hits_100m_single + + + + CREATE TABLE jit_test_memory ( + key UInt64, + value_1 UInt64, + value_2 UInt64, + value_3 UInt64, + value_4 UInt64, + value_5 UInt64 + ) Engine = Memory + + + + CREATE TABLE jit_test_merge_tree ( + key UInt64, + value_1 UInt64, + value_2 UInt64, + value_3 UInt64, + value_4 UInt64, + value_5 UInt64 + ) Engine = MergeTree + ORDER BY key + + + + CREATE TABLE jit_test_memory_nullable ( + key UInt64, + value_1 Nullable(UInt64), + value_2 Nullable(UInt64), + value_3 Nullable(UInt64), + value_4 Nullable(UInt64), + value_5 Nullable(UInt64) + ) Engine = Memory + + + + CREATE TABLE jit_test_merge_tree_nullable ( + key UInt64, + value_1 Nullable(UInt64), + value_2 Nullable(UInt64), + value_3 Nullable(UInt64), + value_4 Nullable(UInt64), + value_5 Nullable(UInt64) + ) Engine = MergeTree + ORDER BY key + + + + + table + + jit_test_memory + jit_test_merge_tree + jit_test_memory_nullable + jit_test_merge_tree_nullable + + + + + + INSERT INTO {table} + SELECT + number, + rand64(0), + rand64(1), + rand64(2), + rand64(3), + rand64(4) + FROM + system.numbers_mt + LIMIT 10000000 + + + + SELECT + * + FROM {table} + ORDER BY value_1 ASC + FORMAT Null + + + + SELECT + * + FROM {table} + ORDER BY value_1 ASC, value_2 DESC, value_3 ASC + FORMAT Null + + + + SELECT + * + FROM {table} + ORDER BY value_1 ASC, value_2 DESC, value_3 ASC, value_4 DESC, value_5 ASC + FORMAT Null + + + + SELECT + WatchID + FROM hits_100m_single + ORDER BY WatchID ASC, CounterID DESC, ClientIP ASC + LIMIT 2000000 + FORMAT Null + + + + SELECT + WatchID + FROM hits_100m_single + ORDER BY WatchID ASC, CounterID DESC, ClientIP ASC, IPNetworkID DESC, SearchEngineID ASC + LIMIT 2000000 + FORMAT Null + + + DROP TABLE IF EXISTS {table} + + diff --git a/tests/queries/0_stateless/00558_aggregate_merge_totals_with_arenas.sql b/tests/queries/0_stateless/00558_aggregate_merge_totals_with_arenas.sql index 1b7c9df2d24..1b5384a2e7d 100644 --- a/tests/queries/0_stateless/00558_aggregate_merge_totals_with_arenas.sql +++ b/tests/queries/0_stateless/00558_aggregate_merge_totals_with_arenas.sql @@ -38,7 +38,7 @@ ORDER BY k; * And we use function 'anyLast' for method IAggregateFunction::merge to be called for every new value. * * We use useless HAVING (that is always true), because in absense of HAVING, - * TOTALS are calculated in a simple way in same pass during aggregation, not in TotalsHavingBlockInputStream, + * TOTALS are calculated in a simple way in same pass during aggregation, not in TotalsHavingTransform, * and bug doesn't trigger. * * We use ORDER BY for result of the test to be deterministic. diff --git a/tests/queries/0_stateless/00688_low_cardinality_serialization.reference b/tests/queries/0_stateless/00688_low_cardinality_serialization.reference index aaa2efb51e6..a19bbc95e56 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_serialization.reference +++ b/tests/queries/0_stateless/00688_low_cardinality_serialization.reference @@ -1,4 +1,4 @@ -NativeBlockInputStream +NativeReader LowCardinality(String) 123_a 2 LowCardinality(String) 123_bb 3 123_ LowCardinality(String) 123_ 1 123_a diff --git a/tests/queries/0_stateless/00688_low_cardinality_serialization.sql b/tests/queries/0_stateless/00688_low_cardinality_serialization.sql index e62d824e011..34353286118 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_serialization.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_serialization.sql @@ -1,6 +1,6 @@ -- Tags: no-parallel -select 'NativeBlockInputStream'; +select 'NativeReader'; select toTypeName(dict), dict, lowCardinalityIndices(dict), lowCardinalityKeys(dict) from (select '123_' || toLowCardinality(v) as dict from (select arrayJoin(['a', 'bb', '', 'a', 'ccc', 'a', 'bb', '', 'dddd']) as v)); select '-'; select toTypeName(dict), dict, lowCardinalityIndices(dict), lowCardinalityKeys(dict) from (select '123_' || toLowCardinality(v) as dict from (select arrayJoin(['a', Null, 'bb', '', 'a', Null, 'ccc', 'a', 'bb', '', 'dddd']) as v)); diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh index b28d1b4fa20..2f07fbacff0 100755 --- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh +++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh @@ -29,7 +29,7 @@ function wait_for_query_to_start() while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'insert_$CLICKHOUSE_DATABASE'") == 0 ]]; do sleep 0.001; done # The query is already started, but there is no guarantee that it locks the underlying table already. - # Wait until PushingToViewsBlockOutputStream will acquire the lock of the underlying table for the INSERT query. + # Wait until PushingToViews chain will acquire the lock of the underlying table for the INSERT query. # (assume that 0.5 second is enough for this, but this is not 100% correct) sleep 0.5 diff --git a/tests/queries/0_stateless/01023_materialized_view_query_context.sql b/tests/queries/0_stateless/01023_materialized_view_query_context.sql index 415690ae1e5..4f03a8e7e0f 100644 --- a/tests/queries/0_stateless/01023_materialized_view_query_context.sql +++ b/tests/queries/0_stateless/01023_materialized_view_query_context.sql @@ -1,7 +1,7 @@ -- Tags: no-parallel -- Create dictionary, since dictGet*() uses DB::Context in executeImpl() --- (To cover scope of the Context in DB::PushingToViewsBlockOutputStream::process) +-- (To cover scope of the Context in PushingToViews chain) set insert_distributed_sync=1; diff --git a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.reference b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.reference new file mode 100644 index 00000000000..3bf688404f8 --- /dev/null +++ b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.reference @@ -0,0 +1,4 @@ +1 4999999950000000 2 +2 4999999950000000 2 +3 49999995000000 2 +4 49999995000000 2 diff --git a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql new file mode 100644 index 00000000000..001e758284f --- /dev/null +++ b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql @@ -0,0 +1,15 @@ +drop table if exists t; +create table t(n int, a Int64, s String) engine = MergeTree() order by a; + +set optimize_trivial_insert_select=1; + +-- due to aggregate functions, optimize_trivial_insert_select will not be applied +insert into t select 1, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) settings max_insert_threads=4, max_threads=2; +-- due to GROUP BY, optimize_trivial_insert_select will not be applied +insert into t select 2, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) group by 1 settings max_insert_threads=4, max_threads=2; +insert into t select 3, sum(number) as c, getSetting('max_threads') from numbers_mt(10000000) group by 3 settings max_insert_threads=4, max_threads=2; +insert into t select 4, sum(number) as c, getSetting('max_threads') as mt from numbers_mt(10000000) group by mt settings max_insert_threads=4, max_threads=2; + +select n, a, s from t order by n; + +drop table t; diff --git a/tests/queries/0_stateless/01166_truncate_multiple_partitions.reference b/tests/queries/0_stateless/01166_truncate_multiple_partitions.reference new file mode 100644 index 00000000000..27f26b32e1e --- /dev/null +++ b/tests/queries/0_stateless/01166_truncate_multiple_partitions.reference @@ -0,0 +1,8 @@ +20 190 +0 0 +8 52 +0 0 +20 190 +0 0 +8 52 +0 0 diff --git a/tests/queries/0_stateless/01166_truncate_multiple_partitions.sql b/tests/queries/0_stateless/01166_truncate_multiple_partitions.sql new file mode 100644 index 00000000000..1a7e3ed3bc4 --- /dev/null +++ b/tests/queries/0_stateless/01166_truncate_multiple_partitions.sql @@ -0,0 +1,31 @@ +drop table if exists trunc; + +set default_table_engine='ReplicatedMergeTree'; +create table trunc (n int, primary key n) engine=ReplicatedMergeTree('/test/1166/{database}', '1') partition by n % 10; +insert into trunc select * from numbers(20); +select count(), sum(n) from trunc; +alter table trunc detach partition all; +select count(), sum(n) from trunc; +alter table trunc attach partition id '0'; +alter table trunc attach partition id '1'; +alter table trunc attach partition id '2'; +alter table trunc attach partition id '3'; +select count(), sum(n) from trunc; +truncate trunc; +select count(), sum(n) from trunc; +drop table trunc; + +set default_table_engine='MergeTree'; +create table trunc (n int, primary key n) partition by n % 10; +insert into trunc select * from numbers(20); +select count(), sum(n) from trunc; +alter table trunc detach partition all; +select count(), sum(n) from trunc; +alter table trunc attach partition id '0'; +alter table trunc attach partition id '1'; +alter table trunc attach partition id '2'; +alter table trunc attach partition id '3'; +select count(), sum(n) from trunc; +truncate trunc; +select count(), sum(n) from trunc; +drop table trunc; \ No newline at end of file diff --git a/tests/queries/0_stateless/01268_procfs_metrics.sh b/tests/queries/0_stateless/01268_procfs_metrics.sh index 959d922d801..4f09d197596 100755 --- a/tests/queries/0_stateless/01268_procfs_metrics.sh +++ b/tests/queries/0_stateless/01268_procfs_metrics.sh @@ -17,5 +17,5 @@ truncate -s1025 "$tmp_path" $CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars # NOTE: that OSCPUVirtualTimeMicroseconds is in microseconds, so 1e6 is not enough. -$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM numbers(10e6) FORMAT Null" |& grep -m1 -F -o -e OSCPUVirtualTimeMicroseconds +$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM numbers(1e8) FORMAT Null" |& grep -m1 -F -o -e OSCPUVirtualTimeMicroseconds exit 0 diff --git a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql index 59d8605ba1c..58522942641 100644 --- a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql +++ b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql @@ -13,7 +13,6 @@ SET log_queries=0; SYSTEM FLUSH LOGS; -- 1 for PullingAsyncPipelineExecutor::pull --- 1 for AsynchronousBlockInputStream SELECT throwIf(count() != 1, 'no query was logged'), throwIf(length(thread_ids) != 2, 'too many threads used') diff --git a/tests/queries/0_stateless/01293_show_clusters.sql b/tests/queries/0_stateless/01293_show_clusters.sql index ad5e51531e3..e1ef8621a16 100644 --- a/tests/queries/0_stateless/01293_show_clusters.sql +++ b/tests/queries/0_stateless/01293_show_clusters.sql @@ -1,3 +1,4 @@ -- don't show all clusters to reduce dependency on the configuration of server +set send_logs_level = 'fatal'; show clusters like 'test_shard%' limit 1; show cluster 'test_shard_localhost'; diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 75e0ec5ffec..f7ac5457f4a 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -3,6 +3,5 @@ connect_timeout Seconds 10 connect_timeout_with_failover_ms Milliseconds 2000 connect_timeout_with_failover_secure_ms Milliseconds 3000 external_storage_connect_timeout_sec UInt64 10 -max_memory_usage UInt64 10000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/01461_query_start_time_microseconds.sql b/tests/queries/0_stateless/01461_query_start_time_microseconds.sql index be1d9897053..9f78a1f95e1 100644 --- a/tests/queries/0_stateless/01461_query_start_time_microseconds.sql +++ b/tests/queries/0_stateless/01461_query_start_time_microseconds.sql @@ -23,6 +23,7 @@ WITH ( ) AS t) SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(t)) = 0, 'ok', 'fail'); -- +SET log_query_threads = 1; SELECT '01461_query_thread_log_query_start_time_milliseconds_test'; SYSTEM FLUSH LOGS; -- assumes that the query_start_time field is already accurate. diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql index 932acf48cc4..2b443cf82eb 100644 --- a/tests/queries/0_stateless/01473_event_time_microseconds.sql +++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql @@ -6,6 +6,7 @@ -- Refer: tests/integration/test_asynchronous_metric_log_table. SET log_queries = 1; +SET log_query_threads = 1; SET query_profiler_real_time_period_ns = 100000000; -- a long enough query to trigger the query profiler and to record trace log SELECT sleep(2) FORMAT Null; diff --git a/tests/queries/0_stateless/01586_columns_pruning.sql b/tests/queries/0_stateless/01586_columns_pruning.sql index 4ae6b6c7c10..598e64b9fe4 100644 --- a/tests/queries/0_stateless/01586_columns_pruning.sql +++ b/tests/queries/0_stateless/01586_columns_pruning.sql @@ -1,3 +1,5 @@ +SET max_memory_usage = 10000000000; + -- Unneeded column is removed from subquery. SELECT count() FROM (SELECT number, groupArray(repeat(toString(number), 1000000)) FROM numbers(1000000) GROUP BY number); -- Unneeded column cannot be removed from subquery and the query is out of memory diff --git a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference index 9f376fb3e4f..f3be69d3279 100644 --- a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference +++ b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference @@ -1,5 +1,5 @@ - DistributedBlockOutputStream: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done. - DistributedBlockOutputStream: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done. + DistributedSink: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done. + DistributedSink: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done. default.dist_01683.DirectoryMonitor: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done default.dist_01683.DirectoryMonitor: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done 1 diff --git a/tests/queries/0_stateless/01883_grouping_sets_crash.reference b/tests/queries/0_stateless/01883_grouping_sets_crash.reference new file mode 100644 index 00000000000..4d9e967b766 --- /dev/null +++ b/tests/queries/0_stateless/01883_grouping_sets_crash.reference @@ -0,0 +1,209 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +SECOND QUERY: +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +THIRD QUERY: +\N 1 0 0 +\N 2 0 0 +\N 3 0 0 +\N 4 0 0 +\N 5 0 0 +\N 1 0 0 +\N 2 0 0 +\N 3 0 0 +\N 4 0 0 +\N 5 0 0 +\N 0 10 10 +\N 0 9 9 +\N 0 8 8 +\N 0 7 7 +\N 0 6 6 +\N 0 5 5 +\N 0 4 4 +\N 0 3 3 +\N 0 2 2 +\N 0 1 1 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 diff --git a/tests/queries/0_stateless/01883_grouping_sets_crash.sql b/tests/queries/0_stateless/01883_grouping_sets_crash.sql new file mode 100644 index 00000000000..cf56c8546ce --- /dev/null +++ b/tests/queries/0_stateless/01883_grouping_sets_crash.sql @@ -0,0 +1,99 @@ +DROP TABLE IF EXISTS grouping_sets; + +CREATE TABLE grouping_sets(fact_1_id Int32, fact_2_id Int32, fact_3_id Int32, fact_4_id Int32, sales_value Int32) ENGINE = Memory; + +INSERT INTO grouping_sets +SELECT + number % 2 + 1 AS fact_1_id, + number % 5 + 1 AS fact_2_id, + number % 10 + 1 AS fact_3_id, + number % 10 + 1 AS fact_4_id, + number % 100 AS sales_value +FROM system.numbers limit 1000; + +SELECT + fact_3_id, + fact_4_id +FROM grouping_sets +GROUP BY + GROUPING SETS ( + ('wo\0ldworldwo\0ldworld'), + (fact_3_id, fact_4_id)) +ORDER BY + fact_3_id, fact_4_id; + +SELECT 'SECOND QUERY:'; + +SELECT + fact_3_id, + fact_4_id +FROM grouping_sets +GROUP BY + GROUPING SETS ( + (fact_1_id, fact_2_id), + ((-9223372036854775808, NULL, (tuple(1.), (tuple(1.), 1048576), 65535))), + ((tuple(3.4028234663852886e38), (tuple(1024), -2147483647), NULL)), + (fact_3_id, fact_4_id)) +ORDER BY + (NULL, ('256', (tuple(NULL), NULL), NULL, NULL), NULL) ASC, + fact_1_id DESC NULLS FIRST, + fact_2_id DESC NULLS FIRST, + fact_4_id ASC; + +SELECT 'THIRD QUERY:'; + +SELECT + extractAllGroups(NULL, 'worldworldworldwo\0ldworldworldworldwo\0ld'), + fact_2_id, + fact_3_id, + fact_4_id +FROM grouping_sets +GROUP BY + GROUPING SETS ( + (sales_value), + (fact_1_id, fact_2_id), + ('wo\0ldworldwo\0ldworld'), + (fact_3_id, fact_4_id)) +ORDER BY + fact_1_id DESC NULLS LAST, + fact_1_id DESC NULLS FIRST, + fact_2_id ASC, + fact_3_id DESC NULLS FIRST, + fact_4_id ASC; + +SELECT fact_3_id +FROM grouping_sets +GROUP BY + GROUPING SETS ((fact_3_id, fact_4_id)) +ORDER BY fact_3_id ASC; + +-- Following two queries were fuzzed +SELECT 'w\0\0ldworldwo\0l\0world' +FROM grouping_sets +GROUP BY + GROUPING SETS ( + ( fact_4_id), + ( NULL), + ( fact_3_id, fact_4_id)) +ORDER BY + NULL ASC, + NULL DESC NULLS FIRST, + fact_3_id ASC, + fact_3_id ASC NULLS LAST, + 'wo\0ldworldwo\0ldworld' ASC NULLS LAST, + 'w\0\0ldworldwo\0l\0world' DESC NULLS FIRST, + 'wo\0ldworldwo\0ldworld' ASC, + NULL ASC NULLS FIRST, + fact_4_id DESC NULLS LAST; + +SELECT fact_3_id +FROM grouping_sets +GROUP BY + GROUPING SETS ( + ( 'wo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworld'), + ( NULL), + ( fact_4_id), + ( fact_3_id, fact_4_id)) +ORDER BY fact_3_id ASC NULLS FIRST; + +DROP TABLE IF EXISTS grouping_sets; \ No newline at end of file diff --git a/tests/queries/0_stateless/01883_with_grouping_sets.reference b/tests/queries/0_stateless/01883_with_grouping_sets.reference new file mode 100644 index 00000000000..a036ccb0796 --- /dev/null +++ b/tests/queries/0_stateless/01883_with_grouping_sets.reference @@ -0,0 +1,208 @@ +(Expression) +ExpressionTransform + (Sorting) + MergingSortedTransform 2 → 1 + MergeSortingTransform × 2 + LimitsCheckingTransform × 2 + PartialSortingTransform × 2 + (Expression) + ExpressionTransform × 2 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (SettingQuotaAndLimits) + (ReadFromStorage) + Memory 0 → 1 +1 0 1 4500 +1 0 3 4700 +1 0 5 4900 +1 0 7 5100 +1 0 9 5300 +1 1 0 4500 +1 2 0 5100 +1 3 0 4700 +1 4 0 5300 +1 5 0 4900 +2 0 2 4600 +2 0 4 4800 +2 0 6 5000 +2 0 8 5200 +2 0 10 5400 +2 1 0 5000 +2 2 0 4600 +2 3 0 5200 +2 4 0 4800 +2 5 0 5400 +0 0 1 1 4500 +0 0 2 2 4600 +0 0 3 3 4700 +0 0 4 4 4800 +0 0 5 5 4900 +0 0 6 6 5000 +0 0 7 7 5100 +0 0 8 8 5200 +0 0 9 9 5300 +0 0 10 10 5400 +1 1 0 0 4500 +1 2 0 0 5100 +1 3 0 0 4700 +1 4 0 0 5300 +1 5 0 0 4900 +2 1 0 0 5000 +2 2 0 0 4600 +2 3 0 0 5200 +2 4 0 0 4800 +2 5 0 0 5400 +0 0 0 49500 +0 0 1 4500 +0 0 2 4600 +0 0 3 4700 +0 0 4 4800 +0 0 5 4900 +0 0 6 5000 +0 0 7 5100 +0 0 8 5200 +0 0 9 5300 +0 0 10 5400 +1 1 0 4500 +1 2 0 5100 +1 3 0 4700 +1 4 0 5300 +1 5 0 4900 +2 1 0 5000 +2 2 0 4600 +2 3 0 5200 +2 4 0 4800 +2 5 0 5400 +(Expression) +ExpressionTransform + (Sorting) + MergingSortedTransform 2 → 1 + MergeSortingTransform × 2 + LimitsCheckingTransform × 2 + PartialSortingTransform × 2 + (Expression) + ExpressionTransform × 2 + (Aggregating) + ExpressionTransform × 2 + Resize × 2 3 → 1 + AggregatingTransform × 6 + Copy × 3 1 → 2 + (Expression) + ExpressionTransform × 3 + (SettingQuotaAndLimits) + (ReadFromStorage) + NumbersMt × 3 0 → 1 +4999500000 10000 +4999510000 10000 +4999520000 10000 +4999530000 10000 +4999540000 10000 +4999550000 10000 +4999560000 10000 +4999570000 10000 +4999580000 10000 +4999590000 10000 +4999600000 10000 +4999610000 10000 +4999620000 10000 +4999630000 10000 +4999640000 10000 +4999650000 10000 +4999660000 10000 +4999670000 10000 +4999680000 10000 +4999690000 10000 +4999700000 10000 +4999710000 10000 +4999720000 10000 +4999730000 10000 +4999740000 10000 +4999750000 10000 +4999760000 10000 +4999770000 10000 +4999780000 10000 +4999790000 10000 +4999800000 10000 +4999810000 10000 +4999820000 10000 +4999830000 10000 +4999840000 10000 +4999850000 10000 +4999860000 10000 +4999870000 10000 +4999880000 10000 +4999890000 10000 +4999900000 10000 +4999910000 10000 +4999920000 10000 +4999930000 10000 +4999940000 10000 +4999950000 10000 +4999960000 10000 +4999970000 10000 +4999980000 10000 +4999990000 10000 +5000000000 10000 +5000010000 10000 +5000020000 10000 +5000030000 10000 +5000040000 10000 +5000050000 10000 +5000060000 10000 +5000070000 10000 +5000080000 10000 +5000090000 10000 +5000100000 10000 +5000110000 10000 +5000120000 10000 +5000130000 10000 +5000140000 10000 +5000150000 10000 +5000160000 10000 +5000170000 10000 +5000180000 10000 +5000190000 10000 +5000200000 10000 +5000210000 10000 +5000220000 10000 +5000230000 10000 +5000240000 10000 +5000250000 10000 +5000260000 10000 +5000270000 10000 +5000280000 10000 +5000290000 10000 +5000300000 10000 +5000310000 10000 +5000320000 10000 +5000330000 10000 +5000340000 10000 +5000350000 10000 +5000360000 10000 +5000370000 10000 +5000380000 10000 +5000390000 10000 +5000400000 10000 +5000410000 10000 +5000420000 10000 +5000430000 10000 +5000440000 10000 +5000450000 10000 +5000460000 10000 +5000470000 10000 +5000480000 10000 +5000490000 10000 +49999500000 100000 +49999600000 100000 +49999700000 100000 +49999800000 100000 +49999900000 100000 +50000000000 100000 +50000100000 100000 +50000200000 100000 +50000300000 100000 +50000400000 100000 diff --git a/tests/queries/0_stateless/01883_with_grouping_sets.sql b/tests/queries/0_stateless/01883_with_grouping_sets.sql new file mode 100644 index 00000000000..bf96248e10e --- /dev/null +++ b/tests/queries/0_stateless/01883_with_grouping_sets.sql @@ -0,0 +1,58 @@ +DROP TABLE IF EXISTS grouping_sets; + +CREATE TABLE grouping_sets(fact_1_id Int32, fact_2_id Int32, fact_3_id Int32, fact_4_id Int32, sales_value Int32) ENGINE = Memory; + +SELECT fact_1_id, fact_3_id, sum(sales_value), count() from grouping_sets GROUP BY GROUPING SETS(fact_1_id, fact_3_id) ORDER BY fact_1_id, fact_3_id; + +INSERT INTO grouping_sets +SELECT + number % 2 + 1 AS fact_1_id, + number % 5 + 1 AS fact_2_id, + number % 10 + 1 AS fact_3_id, + number % 10 + 1 AS fact_4_id, + number % 100 AS sales_value +FROM system.numbers limit 1000; + +EXPLAIN PIPELINE +SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value from grouping_sets +GROUP BY GROUPING SETS ((fact_1_id, fact_2_id), (fact_1_id, fact_3_id)) +ORDER BY fact_1_id, fact_2_id, fact_3_id; + +SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value from grouping_sets +GROUP BY GROUPING SETS ((fact_1_id, fact_2_id), (fact_1_id, fact_3_id)) +ORDER BY fact_1_id, fact_2_id, fact_3_id; + +SELECT fact_1_id, fact_2_id, fact_3_id, fact_4_id, SUM(sales_value) AS sales_value from grouping_sets +GROUP BY GROUPING SETS ((fact_1_id, fact_2_id), (fact_3_id, fact_4_id)) +ORDER BY fact_1_id, fact_2_id, fact_3_id, fact_4_id; + +SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value from grouping_sets +GROUP BY GROUPING SETS ((fact_1_id, fact_2_id), (fact_3_id), ()) +ORDER BY fact_1_id, fact_2_id, fact_3_id; + +SELECT + fact_1_id, + fact_3_id, + SUM(sales_value) AS sales_value +FROM grouping_sets +GROUP BY grouping sets ((fact_1_id), (fact_1_id, fact_3_id)) WITH TOTALS +ORDER BY fact_1_id, fact_3_id; -- { serverError NOT_IMPLEMENTED } + +SELECT + fact_1_id, + fact_3_id, + SUM(sales_value) AS sales_value +FROM grouping_sets +GROUP BY grouping sets (fact_1_id, (fact_1_id, fact_3_id)) WITH TOTALS +ORDER BY fact_1_id, fact_3_id; -- { serverError NOT_IMPLEMENTED } + +DROP TABLE grouping_sets; + +EXPLAIN PIPELINE +SELECT SUM(number) as sum_value, count() AS count_value from numbers_mt(1000000) +GROUP BY GROUPING SETS ((number % 10), (number % 100)) +ORDER BY sum_value, count_value SETTINGS max_threads=3; + +SELECT SUM(number) as sum_value, count() AS count_value from numbers_mt(1000000) +GROUP BY GROUPING SETS ((number % 10), (number % 100)) +ORDER BY sum_value, count_value SETTINGS max_threads=3; diff --git a/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.reference b/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.reference deleted file mode 100644 index 3f71510f3a5..00000000000 --- a/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.reference +++ /dev/null @@ -1,8 +0,0 @@ -4 -1 -2 -3 -3 -3 -3 -4 diff --git a/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql b/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql deleted file mode 100644 index d0a55c6ba65..00000000000 --- a/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql +++ /dev/null @@ -1,32 +0,0 @@ -SET optimize_move_to_prewhere = 1; - -DROP TABLE IF EXISTS 02131_multiply_row_policies_on_same_column; -CREATE TABLE 02131_multiply_row_policies_on_same_column (x UInt8) ENGINE = MergeTree ORDER BY x; -INSERT INTO 02131_multiply_row_policies_on_same_column VALUES (1), (2), (3), (4); - - -DROP ROW POLICY IF EXISTS 02131_filter_1 ON 02131_multiply_row_policies_on_same_column; -DROP ROW POLICY IF EXISTS 02131_filter_2 ON 02131_multiply_row_policies_on_same_column; -DROP ROW POLICY IF EXISTS 02131_filter_3 ON 02131_multiply_row_policies_on_same_column; -SELECT count() FROM 02131_multiply_row_policies_on_same_column; - - -CREATE ROW POLICY 02131_filter_1 ON 02131_multiply_row_policies_on_same_column USING x=1 TO ALL; -SELECT count() FROM 02131_multiply_row_policies_on_same_column; -CREATE ROW POLICY 02131_filter_2 ON 02131_multiply_row_policies_on_same_column USING x=2 TO ALL; -SELECT count() FROM 02131_multiply_row_policies_on_same_column; -CREATE ROW POLICY 02131_filter_3 ON 02131_multiply_row_policies_on_same_column USING x=3 TO ALL; -SELECT count() FROM 02131_multiply_row_policies_on_same_column; -SELECT count() FROM 02131_multiply_row_policies_on_same_column; - - -CREATE ROW POLICY 02131_filter_4 ON 02131_multiply_row_policies_on_same_column USING x<4 AS RESTRICTIVE TO ALL; -SELECT count() FROM 02131_multiply_row_policies_on_same_column; -SELECT count() FROM 02131_multiply_row_policies_on_same_column; - -DROP ROW POLICY 02131_filter_1 ON 02131_multiply_row_policies_on_same_column; -DROP ROW POLICY 02131_filter_2 ON 02131_multiply_row_policies_on_same_column; -DROP ROW POLICY 02131_filter_3 ON 02131_multiply_row_policies_on_same_column; -DROP ROW POLICY 02131_filter_4 ON 02131_multiply_row_policies_on_same_column; -SELECT count() FROM 02131_multiply_row_policies_on_same_column; -DROP TABLE 02131_multiply_row_policies_on_same_column; diff --git a/tests/queries/0_stateless/02131_row_policies_combination.reference b/tests/queries/0_stateless/02131_row_policies_combination.reference new file mode 100644 index 00000000000..b76028d5077 --- /dev/null +++ b/tests/queries/0_stateless/02131_row_policies_combination.reference @@ -0,0 +1,33 @@ +None +1 +2 +3 +4 +R1: x == 1 +1 +R1, R2: (x == 1) OR (x == 2) +1 +2 +R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3) +1 +2 +3 +R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2) +1 +2 +R1, R2, R3, R4, R5: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2) AND (x >= 2) +2 +R2, R3, R4, R5: ((x == 2) OR (x == 3)) AND (x <= 2) AND (x >= 2) +2 +R3, R4, R5: (x == 3) AND (x <= 2) AND (x >= 2) +R4, R5: (x <= 2) AND (x >= 2) +2 +R5: (x >= 2) +2 +3 +4 +None +1 +2 +3 +4 diff --git a/tests/queries/0_stateless/02131_row_policies_combination.sql b/tests/queries/0_stateless/02131_row_policies_combination.sql new file mode 100644 index 00000000000..b5be672bb1b --- /dev/null +++ b/tests/queries/0_stateless/02131_row_policies_combination.sql @@ -0,0 +1,54 @@ +DROP TABLE IF EXISTS 02131_rptable; +CREATE TABLE 02131_rptable (x UInt8) ENGINE = MergeTree ORDER BY x; +INSERT INTO 02131_rptable VALUES (1), (2), (3), (4); + +DROP ROW POLICY IF EXISTS 02131_filter_1 ON 02131_rptable; +DROP ROW POLICY IF EXISTS 02131_filter_2 ON 02131_rptable; +DROP ROW POLICY IF EXISTS 02131_filter_3 ON 02131_rptable; +DROP ROW POLICY IF EXISTS 02131_filter_4 ON 02131_rptable; +DROP ROW POLICY IF EXISTS 02131_filter_5 ON 02131_rptable; + +SELECT 'None'; +SELECT * FROM 02131_rptable; + +CREATE ROW POLICY 02131_filter_1 ON 02131_rptable USING x=1 AS permissive TO ALL; +SELECT 'R1: x == 1'; +SELECT * FROM 02131_rptable; + +CREATE ROW POLICY 02131_filter_2 ON 02131_rptable USING x=2 AS permissive TO ALL; +SELECT 'R1, R2: (x == 1) OR (x == 2)'; +SELECT * FROM 02131_rptable; + +CREATE ROW POLICY 02131_filter_3 ON 02131_rptable USING x=3 AS permissive TO ALL; +SELECT 'R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3)'; +SELECT * FROM 02131_rptable; + +CREATE ROW POLICY 02131_filter_4 ON 02131_rptable USING x<=2 AS restrictive TO ALL; +SELECT 'R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2)'; +SELECT * FROM 02131_rptable; + +CREATE ROW POLICY 02131_filter_5 ON 02131_rptable USING x>=2 AS restrictive TO ALL; +SELECT 'R1, R2, R3, R4, R5: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2) AND (x >= 2)'; +SELECT * FROM 02131_rptable; + +DROP ROW POLICY 02131_filter_1 ON 02131_rptable; +SELECT 'R2, R3, R4, R5: ((x == 2) OR (x == 3)) AND (x <= 2) AND (x >= 2)'; +SELECT * FROM 02131_rptable; + +DROP ROW POLICY 02131_filter_2 ON 02131_rptable; +SELECT 'R3, R4, R5: (x == 3) AND (x <= 2) AND (x >= 2)'; +SELECT * FROM 02131_rptable; + +DROP ROW POLICY 02131_filter_3 ON 02131_rptable; +SELECT 'R4, R5: (x <= 2) AND (x >= 2)'; +SELECT * FROM 02131_rptable; + +DROP ROW POLICY 02131_filter_4 ON 02131_rptable; +SELECT 'R5: (x >= 2)'; +SELECT * FROM 02131_rptable; + +DROP ROW POLICY 02131_filter_5 ON 02131_rptable; +SELECT 'None'; +SELECT * FROM 02131_rptable; + +DROP TABLE 02131_rptable; diff --git a/tests/queries/0_stateless/02165_replicated_grouping_sets.reference b/tests/queries/0_stateless/02165_replicated_grouping_sets.reference new file mode 100644 index 00000000000..659cd98368d --- /dev/null +++ b/tests/queries/0_stateless/02165_replicated_grouping_sets.reference @@ -0,0 +1,13 @@ +0 0 3 2 +0 1 5 2 +0 0 6 3 +0 2 7 2 +1 0 9 3 +0 0 6 4 +0 1 10 4 +0 0 12 6 +0 2 14 4 +1 0 18 6 +0 6 4 +1 10 4 +2 14 4 diff --git a/tests/queries/0_stateless/02165_replicated_grouping_sets.sql b/tests/queries/0_stateless/02165_replicated_grouping_sets.sql new file mode 100644 index 00000000000..d92d92c3e72 --- /dev/null +++ b/tests/queries/0_stateless/02165_replicated_grouping_sets.sql @@ -0,0 +1,45 @@ +SELECT + k1, + k2, + SUM(number) AS sum_value, + count() AS count_value +FROM numbers(6) +GROUP BY + GROUPING SETS + ( + (number % 2 AS k1), + (number % 3 AS k2) + ) +ORDER BY + sum_value ASC, + count_value ASC; + +SELECT + k1, + k2, + SUM(number) AS sum_value, + count() AS count_value +FROM remote('127.0.0.{2,3}', numbers(6)) +GROUP BY + GROUPING SETS + ( + (number % 2 AS k1), + (number % 3 AS k2) + ) +ORDER BY + sum_value ASC, + count_value ASC; + +SELECT + k2, + SUM(number) AS sum_value, + count() AS count_value +FROM remote('127.0.0.{2,3}', numbers(6)) +GROUP BY + GROUPING SETS + ( + (number % 3 AS k2) + ) +ORDER BY + sum_value ASC, + count_value ASC; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache_log.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache_log.reference index 9b0d13a1625..0a797d38d7e 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache_log.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache_log.reference @@ -7,12 +7,11 @@ DROP TABLE IF EXISTS test; DROP TABLE IF EXISTS system.filesystem_cache_log; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100000); -SELECT * FROM test FORMAT Null; +SELECT 2240, * FROM test FORMAT Null; SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE read_type='READ_FROM_FS_AND_DOWNLOADED_TO_CACHE'; -(0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE read_type='READ_FROM_FS_AND_DOWNLOADED_TO_CACHE' AND query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2240%' AND current_database = currentDatabase() ORDER BY event_time desc LIMIT 1); (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE -SELECT * FROM test FORMAT Null; +SELECT 2241, * FROM test FORMAT Null; SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE read_type='READ_FROM_CACHE'; +SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE read_type='READ_FROM_CACHE' AND query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2241%' AND current_database = currentDatabase() ORDER BY event_time desc LIMIT 1); (0,808110) READ_FROM_CACHE diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache_log.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache_log.sql index 59e164d6f9f..871f9305c55 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache_log.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache_log.sql @@ -11,10 +11,10 @@ DROP TABLE IF EXISTS system.filesystem_cache_log; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100000); -SELECT * FROM test FORMAT Null; +SELECT 2240, * FROM test FORMAT Null; SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE read_type='READ_FROM_FS_AND_DOWNLOADED_TO_CACHE'; +SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE read_type='READ_FROM_FS_AND_DOWNLOADED_TO_CACHE' AND query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2240%' AND current_database = currentDatabase() ORDER BY event_time desc LIMIT 1); -SELECT * FROM test FORMAT Null; +SELECT 2241, * FROM test FORMAT Null; SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE read_type='READ_FROM_CACHE'; +SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE read_type='READ_FROM_CACHE' AND query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2241%' AND current_database = currentDatabase() ORDER BY event_time desc LIMIT 1); diff --git a/tests/queries/0_stateless/02267_join_dup_columns_issue36199.reference b/tests/queries/0_stateless/02267_join_dup_columns_issue36199.reference index 47e37dc8fc8..c075b08e533 100644 --- a/tests/queries/0_stateless/02267_join_dup_columns_issue36199.reference +++ b/tests/queries/0_stateless/02267_join_dup_columns_issue36199.reference @@ -1,2 +1,14 @@ 2 1 2 +0 1 0 +0 1 1 0 +0 1 0 +0 0 1 0 +y 0 0 1 3 +\N 0 \N 10000000000 \N 2 1 2 +0 1 0 +0 1 1 0 +0 1 0 +0 0 1 0 +y 0 0 1 3 +\N 0 \N 10000000000 \N diff --git a/tests/queries/0_stateless/02267_join_dup_columns_issue36199.sql b/tests/queries/0_stateless/02267_join_dup_columns_issue36199.sql index 118d475207a..b51b3cc22e2 100644 --- a/tests/queries/0_stateless/02267_join_dup_columns_issue36199.sql +++ b/tests/queries/0_stateless/02267_join_dup_columns_issue36199.sql @@ -1,13 +1,22 @@ -set join_algorithm = 'hash'; +SET join_algorithm = 'hash'; -SELECT * -FROM ( SELECT 2 AS x ) AS t1 -RIGHT JOIN ( SELECT count('x'), count('y'), 2 AS x ) AS t2 -ON t1.x = t2.x; +SELECT * FROM ( SELECT 2 AS x ) AS t1 RIGHT JOIN ( SELECT count('x'), count('y'), 2 AS x ) AS t2 ON t1.x = t2.x; +SELECT * FROM ( SELECT 2 AS x ) as t1 RIGHT JOIN ( SELECT count('x'), count('y'), 0 AS x ) AS t2 ON t1.x = t2.x; +SELECT * FROM ( SELECT 2 AS x ) as t1 RIGHT JOIN ( SELECT count('x') :: Nullable(Int32), count('y'), 0 AS x ) AS t2 ON t1.x = t2.x; +SELECT * FROM ( SELECT 2 AS x ) as t1 RIGHT JOIN ( SELECT count('x') :: Nullable(Int32), count('y') :: Nullable(Int32), 0 AS x ) AS t2 ON t1.x = t2.x; +SELECT * FROM ( SELECT count('a'), count('b'), count('c'), 2 AS x ) as t1 RIGHT JOIN ( SELECT count('x'), count('y'), 0 AS x ) AS t2 ON t1.x = t2.x; -set join_algorithm = 'partial_merge'; +SELECT 'y', * FROM (SELECT count('y'), count('y'), 2 AS x) AS t1 RIGHT JOIN (SELECT count('x'), count('y'), 3 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT arrayJoin([NULL]), 9223372036854775806, arrayJoin([NULL]), NULL AS x) AS t1 RIGHT JOIN (SELECT arrayJoin([arrayJoin([10000000000.])]), NULL AS x) AS t2 ON t1.x = t2.x; -SELECT * -FROM ( SELECT 2 AS x ) AS t1 -RIGHT JOIN ( SELECT count('x'), count('y'), 2 AS x ) AS t2 -ON t1.x = t2.x; +SET join_algorithm = 'partial_merge'; + +SELECT * FROM ( SELECT 2 AS x ) AS t1 RIGHT JOIN ( SELECT count('x'), count('y'), 2 AS x ) AS t2 ON t1.x = t2.x; +SELECT * FROM ( SELECT 2 AS x ) as t1 RIGHT JOIN ( SELECT count('x'), count('y'), 0 AS x ) AS t2 ON t1.x = t2.x; +SELECT * FROM ( SELECT 2 AS x ) as t1 RIGHT JOIN ( SELECT count('x') :: Nullable(Int32), count('y'), 0 AS x ) AS t2 ON t1.x = t2.x; +SELECT * FROM ( SELECT 2 AS x ) as t1 RIGHT JOIN ( SELECT count('x') :: Nullable(Int32), count('y') :: Nullable(Int32), 0 AS x ) AS t2 ON t1.x = t2.x; + +SELECT * FROM ( SELECT count('a'), count('b'), count('c'), 2 AS x ) as t1 RIGHT JOIN ( SELECT count('x'), count('y'), 0 AS x ) AS t2 ON t1.x = t2.x; + +SELECT 'y', * FROM (SELECT count('y'), count('y'), 2 AS x) AS t1 RIGHT JOIN (SELECT count('x'), count('y'), 3 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT arrayJoin([NULL]), 9223372036854775806, arrayJoin([NULL]), NULL AS x) AS t1 RIGHT JOIN (SELECT arrayJoin([arrayJoin([10000000000.])]), NULL AS x) AS t2 ON t1.x = t2.x; diff --git a/tests/queries/0_stateless/02286_function_wyhash.reference b/tests/queries/0_stateless/02286_function_wyhash.reference new file mode 100644 index 00000000000..6b861deb81e --- /dev/null +++ b/tests/queries/0_stateless/02286_function_wyhash.reference @@ -0,0 +1,4 @@ +\N +4808886099364463827 +10557035923789874751 +10561902096955922022 diff --git a/tests/queries/0_stateless/02286_function_wyhash.sql b/tests/queries/0_stateless/02286_function_wyhash.sql new file mode 100644 index 00000000000..3307821b465 --- /dev/null +++ b/tests/queries/0_stateless/02286_function_wyhash.sql @@ -0,0 +1,5 @@ +SELECT wyHash64(NULL); +SELECT wyHash64(''); +SELECT wyHash64(' '); +SELECT wyHash64('qwerty'); + diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference new file mode 100644 index 00000000000..9ece9606f8b --- /dev/null +++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference @@ -0,0 +1,300 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql new file mode 100644 index 00000000000..3ff6a5ffbb3 --- /dev/null +++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql @@ -0,0 +1,3 @@ +-- Tags: no-fasttest +insert into function file(02293_data.arrow) select toLowCardinality(toString(number)) from numbers(300) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1; +select * from file(02293_data.arrow); diff --git a/tests/queries/0_stateless/02293_ilike_on_fixed_strings.reference b/tests/queries/0_stateless/02293_ilike_on_fixed_strings.reference new file mode 100644 index 00000000000..5489ab3d7ce --- /dev/null +++ b/tests/queries/0_stateless/02293_ilike_on_fixed_strings.reference @@ -0,0 +1,2 @@ +AA 0 1 +Aa 1 1 diff --git a/tests/queries/0_stateless/02293_ilike_on_fixed_strings.sql b/tests/queries/0_stateless/02293_ilike_on_fixed_strings.sql new file mode 100644 index 00000000000..3838e372e24 --- /dev/null +++ b/tests/queries/0_stateless/02293_ilike_on_fixed_strings.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab (col FixedString(2)) engine = MergeTree() ORDER BY col; + +INSERT INTO tab VALUES ('AA') ('Aa'); + +SELECT col, col LIKE '%a', col ILIKE '%a' FROM tab WHERE col = 'AA'; +SELECT col, col LIKE '%a', col ILIKE '%a' FROM tab WHERE col = 'Aa'; + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02293_optimize_aggregation_in_order_Array_functions.reference b/tests/queries/0_stateless/02293_optimize_aggregation_in_order_Array_functions.reference new file mode 100644 index 00000000000..c800527c196 --- /dev/null +++ b/tests/queries/0_stateless/02293_optimize_aggregation_in_order_Array_functions.reference @@ -0,0 +1 @@ +[0,1] [0,1] diff --git a/tests/queries/0_stateless/02293_optimize_aggregation_in_order_Array_functions.sql b/tests/queries/0_stateless/02293_optimize_aggregation_in_order_Array_functions.sql new file mode 100644 index 00000000000..2df7c9f2b8f --- /dev/null +++ b/tests/queries/0_stateless/02293_optimize_aggregation_in_order_Array_functions.sql @@ -0,0 +1,5 @@ +drop table if exists data_02293; +create table data_02293 (a Int64, grp_aggreg AggregateFunction(groupArrayArray, Array(UInt64)), grp_simple SimpleAggregateFunction(groupArrayArray, Array(UInt64))) engine = MergeTree() order by a; +insert into data_02293 select 1 a, groupArrayArrayState([toUInt64(number)]), groupArrayArray([toUInt64(number)]) from numbers(2) group by a; +SELECT arraySort(groupArrayArrayMerge(grp_aggreg)) gra , arraySort(groupArrayArray(grp_simple)) grs FROM data_02293 group by a SETTINGS optimize_aggregation_in_order=1; +drop table data_02293; diff --git a/tests/queries/0_stateless/02293_selected_rows_and_merges.reference b/tests/queries/0_stateless/02293_selected_rows_and_merges.reference new file mode 100644 index 00000000000..ad87f9ab5e5 --- /dev/null +++ b/tests/queries/0_stateless/02293_selected_rows_and_merges.reference @@ -0,0 +1,2 @@ +0 0 2000000 16000000 +0 0 0 0 diff --git a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh new file mode 100755 index 00000000000..33bfdd45810 --- /dev/null +++ b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +set -ue + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +query_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") + +${CLICKHOUSE_CLIENT} -q "create table tt (x UInt32, y UInt32) engine = MergeTree order by x" +${CLICKHOUSE_CLIENT} -q "insert into tt select number, 0 from numbers(1e6)" +${CLICKHOUSE_CLIENT} -q "insert into tt select number, 1 from numbers(1e6)" + +${CLICKHOUSE_CLIENT} --optimize_throw_if_noop 1 -q "optimize table tt final" "--query_id=$query_id" + +# Here SelectRows and SelectBytes should be zero, MergedRows is 2m and MergedUncompressedBytes is 16m +${CLICKHOUSE_CLIENT} -q "system flush logs" +${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'], ProfileEvents['SelecteBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'optimize%' and current_database = currentDatabase()" + +${CLICKHOUSE_CLIENT} --mutations_sync 1 -q "alter table tt update y = y + 1 where 1" "--query_id=$query_id" +${CLICKHOUSE_CLIENT} -q "system flush logs" + +# Here for mutation all values are 0, cause mutation is executed async. +# It's pretty hard to write a test with total counter. +${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelecteBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" + + diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 23bf74139af..7699e803f76 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -76,7 +76,7 @@ export CLICKHOUSE_PORT_POSTGRESQL=${CLICKHOUSE_PORT_POSTGRESQL:="9005"} export CLICKHOUSE_PORT_KEEPER=${CLICKHOUSE_PORT_KEEPER:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=keeper_server.tcp_port 2>/dev/null)} 2>/dev/null export CLICKHOUSE_PORT_KEEPER=${CLICKHOUSE_PORT_KEEPER:="9181"} -export CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:=$(echo "${CLICKHOUSE_CLIENT}" | sed 's/'"--port=${CLICKHOUSE_PORT_TCP}"'//g; s/$/'"--secure --port=${CLICKHOUSE_PORT_TCP_SECURE}"'/g')} +export CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:=$(echo "${CLICKHOUSE_CLIENT}" | sed 's/--secure //' | sed 's/'"--port=${CLICKHOUSE_PORT_TCP}"'//g; s/$/'"--secure --port=${CLICKHOUSE_PORT_TCP_SECURE}"'/g')} # Add database and log comment to url params if [ -v CLICKHOUSE_URL_PARAMS ] diff --git a/utils/changelog/README.md b/utils/changelog/README.md index cd8f8da9b61..8218af83d96 100644 --- a/utils/changelog/README.md +++ b/utils/changelog/README.md @@ -5,17 +5,14 @@ Generate github token: Dependencies: ``` -sudo apt-get install git curl jq python3 python3-fuzzywuzzy -``` - -Update information about tags: -``` -git fetch --tags +sudo apt-get update +sudo apt-get install git python3 python3-fuzzywuzzy python3-github +python3 changelog.py -h ``` Usage example: ``` -export GITHUB_USER=... GITHUB_TOKEN=ghp_... -./changelog.sh v21.5.6.6-stable v21.6.2.7-prestable +python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$GITHUB_TOKEN" v21.6.2.7-prestable +python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$USER" --gh-password="$PASSWORD" v21.6.2.7-prestable ``` diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py new file mode 100755 index 00000000000..a846c240055 --- /dev/null +++ b/utils/changelog/changelog.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python3 +# In our CI this script runs in style-test containers + +import argparse +import logging +import re +from datetime import date, timedelta +from queue import Empty, Queue +from subprocess import CalledProcessError, DEVNULL +from threading import Thread +from typing import Dict, List, Optional, TextIO + +from fuzzywuzzy.fuzz import ratio # type: ignore +from github import Github +from github.NamedUser import NamedUser +from github.PullRequest import PullRequest +from github.Repository import Repository +from git_helper import is_shallow, git_runner as runner + +# This array gives the preferred category order, and is also used to +# normalize category names. +categories_preferred_order = ( + "Backward Incompatible Change", + "New Feature", + "Performance Improvement", + "Improvement", + "Bug Fix", + "Build/Testing/Packaging Improvement", + "Other", +) + +FROM_REF = "" +TO_REF = "" + + +class Description: + def __init__( + self, number: int, user: NamedUser, html_url: str, entry: str, category: str + ): + self.number = number + self.html_url = html_url + self.user = user + self.entry = entry + self.category = category + + @property + def formatted_entry(self) -> str: + # Substitute issue links. + # 1) issue number w/o markdown link + entry = re.sub( + r"([^[])#([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + self.entry, + ) + # 2) issue URL w/o markdown link + entry = re.sub( + r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + entry, + ) + user_name = self.user.name if self.user.name else self.user.login + return ( + f"* {entry} [#{self.number}]({self.html_url}) " + f"([{user_name}]({self.user.html_url}))." + ) + + # Sort PR descriptions by numbers + def __eq__(self, other) -> bool: + if not isinstance(self, type(other)): + return NotImplemented + return self.number == other.number + + def __lt__(self, other: "Description") -> bool: + return self.number < other.number + + +class Worker(Thread): + def __init__(self, request_queue: Queue, repo: Repository): + Thread.__init__(self) + self.queue = request_queue + self.repo = repo + self.response = [] # type: List[Description] + + def run(self): + while not self.queue.empty(): + try: + number = self.queue.get() + except Empty: + break # possible race condition, just continue + api_pr = self.repo.get_pull(number) + in_changelog = False + merge_commit = api_pr.merge_commit_sha + try: + runner.run(f"git rev-parse '{merge_commit}'") + except CalledProcessError: + # It's possible that commit not in the repo, just continue + logging.info("PR %s does not belong to the repo", api_pr.number) + continue + + try: + runner.run( + f"git merge-base --is-ancestor '{merge_commit}' '{TO_REF}'", + stderr=DEVNULL, + ) + runner.run( + f"git merge-base --is-ancestor '{FROM_REF}' '{merge_commit}'", + stderr=DEVNULL, + ) + in_changelog = True + except CalledProcessError: + # Commit is not between from and to refs + continue + if in_changelog: + desc = generate_description(api_pr, self.repo) + if desc is not None: + self.response.append(desc) + + self.queue.task_done() + + +def get_descriptions( + repo: Repository, numbers: List[int], jobs: int +) -> Dict[str, List[Description]]: + workers = [] # type: List[Worker] + queue = Queue() # type: Queue # (!?!?!?!??!) + for number in numbers: + queue.put(number) + for _ in range(jobs): + worker = Worker(queue, repo) + worker.start() + workers.append(worker) + + descriptions = {} # type: Dict[str, List[Description]] + for worker in workers: + worker.join() + for desc in worker.response: + if desc.category not in descriptions: + descriptions[desc.category] = [] + descriptions[desc.category].append(desc) + + for descs in descriptions.values(): + descs.sort() + + return descriptions + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Generate a changelog in MD format between given tags. " + "It fetches all tags and unshallow the git repositore automatically", + ) + parser.add_argument( + "-v", + "--verbose", + action="count", + default=0, + help="set the script verbosity, could be used multiple", + ) + parser.add_argument( + "--output", + type=argparse.FileType("w"), + default="-", + help="output file for changelog", + ) + parser.add_argument( + "--repo", + default="ClickHouse/ClickHouse", + help="a repository to query for pull-requests from GitHub", + ) + parser.add_argument( + "--jobs", + type=int, + default=10, + help="number of jobs to get pull-requests info from GitHub API", + ) + parser.add_argument( + "--gh-user-or-token", + help="user name or GH token to authenticate", + ) + parser.add_argument( + "--gh-password", + help="a password that should be used when user is given", + ) + parser.add_argument( + "--from", + dest="from_ref", + help="git ref for a starting point of changelog, by default is calculated " + "automatically to match a previous tag in history", + ) + parser.add_argument( + "to_ref", + metavar="TO_REF", + help="git ref for the changelog end", + ) + args = parser.parse_args() + return args + + +# This function mirrors the PR description checks in ClickhousePullRequestTrigger. +# Returns False if the PR should not be mentioned changelog. +def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]: + backport_number = item.number + if item.head.ref.startswith("backport/"): + branch_parts = item.head.ref.split("/") + if len(branch_parts) == 3: + item = repo.get_pull(int(branch_parts[-1])) + else: + logging.warning( + "The branch %s doesn't match backport template, using PR %s as is", + item.head.ref, + item.number, + ) + description = item.body + # Don't skip empty lines because they delimit parts of description + lines = [x.strip() for x in (description.split("\n") if description else [])] + lines = [re.sub(r"\s+", " ", ln) for ln in lines] + + category = "" + entry = "" + + if lines: + i = 0 + while i < len(lines): + if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category itself. + # Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + elif re.match( + r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. + # Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + else: + i += 1 + + if not category: + # Shouldn't happen, because description check in CI should catch such PRs. + # Fall through, so that it shows up in output and the user can fix it. + category = "NO CL CATEGORY" + + # Filter out the PR categories that are not for changelog. + if re.match( + r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", + category, + ): + return None + + if backport_number != item.number: + entry = f"Backported in #{backport_number}: {entry}" + + if not entry: + # Shouldn't happen, because description check in CI should catch such PRs. + category = "NO CL ENTRY" + entry = "NO CL ENTRY: '" + item.title + "'" + + entry = entry.strip() + if entry[-1] != ".": + entry += "." + + for c in categories_preferred_order: + if ratio(category.lower(), c.lower()) >= 90: + category = c + break + + return Description(item.number, item.user, item.html_url, entry, category) + + +def write_changelog(fd: TextIO, descriptions: Dict[str, List[Description]]): + fd.write(f"### ClickHouse release {TO_REF} FIXME as compared to {FROM_REF}\n\n") + + seen_categories = [] # type: List[str] + for category in categories_preferred_order: + if category in descriptions: + seen_categories.append(category) + fd.write(f"#### {category}\n") + for desc in descriptions[category]: + fd.write(f"{desc.formatted_entry}\n") + + fd.write("\n") + + for category in descriptions: + if category not in seen_categories: + fd.write(f"#### {category}\n\n") + for desc in descriptions[category]: + fd.write(f"{desc.formatted_entry}\n") + + fd.write("\n") + + +def check_refs(from_ref: Optional[str], to_ref: str): + global FROM_REF, TO_REF + TO_REF = to_ref + + # Check TO_REF + runner.run(f"git rev-parse {TO_REF}") + + # Check from_ref + if from_ref is None: + FROM_REF = runner.run(f"git describe --abbrev=0 --tags '{TO_REF}~'") + # Check if the previsous tag is different for merge commits + # I __assume__ we won't have octopus merges, at least for the tagged commits + try: + alternative_tag = runner.run( + f"git describe --abbrev=0 --tags '{TO_REF}^2'", stderr=DEVNULL + ) + if FROM_REF != alternative_tag: + raise Exception( + f"Unable to get unified parent tag for {TO_REF}, " + f"define it manually, get {FROM_REF} and {alternative_tag}" + ) + except CalledProcessError: + pass + else: + runner.run(f"git rev-parse {FROM_REF}") + FROM_REF = from_ref + + +def main(): + log_levels = [logging.CRITICAL, logging.WARN, logging.INFO, logging.DEBUG] + args = parse_args() + logging.basicConfig( + format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s", + level=log_levels[min(args.verbose, 3)], + ) + # Get the full repo + if is_shallow(): + logging.info("Unshallow repository") + runner.run("git fetch --unshallow", stderr=DEVNULL) + logging.info("Fetching all tags") + runner.run("git fetch --tags", stderr=DEVNULL) + + check_refs(args.from_ref, args.to_ref) + + logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF) + + # Get starting and ending dates for gathering PRs + # Add one day after and before to mitigate TZ possible issues + # `tag^{}` format gives commit ref when we have annotated tags + from_date = runner.run(f"git log -1 --format=format:%as '{FROM_REF}^{{}}'") + from_date = (date.fromisoformat(from_date) - timedelta(1)).isoformat() + to_date = runner.run(f"git log -1 --format=format:%as '{TO_REF}^{{}}'") + to_date = (date.fromisoformat(to_date) + timedelta(1)).isoformat() + + # Get all PRs for the given time frame + gh = Github( + args.gh_user_or_token, args.gh_password, per_page=100, pool_size=args.jobs + ) + query = f"type:pr repo:{args.repo} is:merged merged:{from_date}..{to_date}" + repo = gh.get_repo(args.repo) + api_prs = gh.search_issues(query=query, sort="created") + logging.info("Found %s PRs for the query: '%s'", api_prs.totalCount, query) + + pr_numbers = [pr.number for pr in api_prs] + + descriptions = get_descriptions(repo, pr_numbers, args.jobs) + + write_changelog(args.output, descriptions) + + +if __name__ == "__main__": + main() diff --git a/utils/changelog/changelog.sh b/utils/changelog/changelog.sh deleted file mode 100755 index 52817acfae4..00000000000 --- a/utils/changelog/changelog.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -set -e - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -from="$1" -to="$2" -log_command=(git log "$from..$to" --first-parent) - -"${log_command[@]}" > "changelog-log.txt" - -# Check for diamond merges. -if "${log_command[@]}" --oneline --grep "Merge branch '" | grep '' -then - # DO NOT ADD automated handling of diamond merges to this script. - # It is an unsustainable way to work with git, and it MUST be visible. - echo Warning: suspected diamond merges above. - echo Some commits will be missed, review these manually. -fi - -# Search for PR numbers in commit messages. First variant is normal merge, and second -# variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") - -# awk is to filter out small task numbers from different task tracker, which are -# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. -"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" - -echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." -if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi - -function github_download() -{ - local url=${1} - local file=${2} - if ! [ -f "$file" ] - then - echo "curl -u \"$GITHUB_USER:***\" -sSf \"$url\" > \"$file\"" - - if ! curl -u "$GITHUB_USER:$GITHUB_TOKEN" \ - -sSf "$url" \ - > "$file" - then - >&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'." - rm "$file" - return 1 - fi - sleep 0.1 - fi -} - -rm changelog-prs-filtered.txt &> /dev/null ||: -for pr in $(cat "changelog-prs.txt") -do - # Download PR info from github. - file="pr$pr.json" - github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue - - if ! [ "$pr" == "$(jq -r .number "$file")" ] - then - >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." - continue - fi - - # Filter out PRs by bots. - user_login=$(jq -r .user.login "$file") - - filter_bot=$(echo "$user_login" | grep -q "\[bot\]$" && echo "Skip." || echo "Ok." ||:) - filter_robot=$(echo "$user_login" | grep -q "robot-clickhouse" && echo "Skip." || echo "Ok." ||:) - - if [ "Skip." == "$filter_robot" ] || [ "Skip." == "$filter_bot" ] - then - continue - fi - - # Download author info from github. - user_id=$(jq -r .user.id "$file") - user_file="user$user_id.json" - github_download "$(jq -r .user.url "$file")" "$user_file" || continue - - if ! [ "$user_id" == "$(jq -r .id "$user_file")" ] - then - >&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')." - continue - fi - - echo "$pr" >> changelog-prs-filtered.txt -done - -echo "### ClickHouse release $to FIXME as compared to $from -" > changelog.md -"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md -cat changelog.md diff --git a/utils/changelog/format-changelog.py b/utils/changelog/format-changelog.py deleted file mode 100755 index ef1340d48dd..00000000000 --- a/utils/changelog/format-changelog.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/python3 - -import argparse -import collections -import fuzzywuzzy.fuzz -import itertools -import json -import os -import re -import sys - -parser = argparse.ArgumentParser(description="Format changelog for given PRs.") -parser.add_argument( - "file", - metavar="FILE", - type=argparse.FileType("r", encoding="utf-8"), - nargs="?", - default=sys.stdin, - help="File with PR numbers, one per line.", -) -args = parser.parse_args() - -# This function mirrors the PR description checks in ClickhousePullRequestTrigger. -# Returns False if the PR should not be mentioned changelog. -def parse_one_pull_request(item): - description = item["body"] - # Don't skip empty lines because they delimit parts of description - lines = [ - line - for line in [ - x.strip() for x in (description.split("\n") if description else []) - ] - ] - lines = [re.sub(r"\s+", " ", l) for l in lines] - - category = "" - entry = "" - - if lines: - i = 0 - while i < len(lines): - if re.match(r"(?i)^[>*_ ]*change\s*log\s*category", lines[i]): - i += 1 - if i >= len(lines): - break - # Can have one empty line between header and the category itself. Filter it out. - if not lines[i]: - i += 1 - if i >= len(lines): - break - category = re.sub(r"^[-*\s]*", "", lines[i]) - i += 1 - elif re.match( - r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] - ): - i += 1 - # Can have one empty line between header and the entry itself. Filter it out. - if i < len(lines) and not lines[i]: - i += 1 - # All following lines until empty one are the changelog entry. - entry_lines = [] - while i < len(lines) and lines[i]: - entry_lines.append(lines[i]) - i += 1 - entry = " ".join(entry_lines) - else: - i += 1 - - if not category: - # Shouldn't happen, because description check in CI should catch such PRs. - # Fall through, so that it shows up in output and the user can fix it. - category = "NO CL CATEGORY" - - # Filter out the PR categories that are not for changelog. - if re.match( - r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", - category, - ): - return False - - if not entry: - # Shouldn't happen, because description check in CI should catch such PRs. - category = "NO CL ENTRY" - entry = "NO CL ENTRY: '" + item["title"] + "'" - - entry = entry.strip() - if entry[-1] != ".": - entry += "." - - item["entry"] = entry - item["category"] = category - - return True - - -# This array gives the preferred category order, and is also used to -# normalize category names. -categories_preferred_order = [ - "Backward Incompatible Change", - "New Feature", - "Performance Improvement", - "Improvement", - "Bug Fix", - "Build/Testing/Packaging Improvement", - "Other", -] - -category_to_pr = collections.defaultdict(lambda: []) -users = {} -for line in args.file: - pr = json.loads(open(f"pr{line.strip()}.json").read()) - assert pr["number"] - if not parse_one_pull_request(pr): - continue - - assert pr["category"] - - # Normalize category name - for c in categories_preferred_order: - if fuzzywuzzy.fuzz.ratio(pr["category"].lower(), c.lower()) >= 90: - pr["category"] = c - break - - category_to_pr[pr["category"]].append(pr) - user_id = pr["user"]["id"] - users[user_id] = json.loads(open(f"user{user_id}.json").read()) - - -def print_category(category): - print(("#### " + category)) - print() - for pr in category_to_pr[category]: - user = users[pr["user"]["id"]] - user_name = user["name"] if user["name"] else user["login"] - - # Substitute issue links. - # 1) issue number w/o markdown link - pr["entry"] = re.sub( - r"([^[])#([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - pr["entry"], - ) - # 2) issue URL w/o markdown link - pr["entry"] = re.sub( - r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - pr["entry"], - ) - - print( - f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).' - ) - - print() - - -# Print categories in preferred order -for category in categories_preferred_order: - if category in category_to_pr: - print_category(category) - category_to_pr.pop(category) - -# Print the rest of the categories -for category in category_to_pr: - print_category(category) diff --git a/utils/changelog/git_helper.py b/utils/changelog/git_helper.py new file mode 120000 index 00000000000..03b05a7eddd --- /dev/null +++ b/utils/changelog/git_helper.py @@ -0,0 +1 @@ +../../tests/ci/git_helper.py \ No newline at end of file diff --git a/utils/changelog/requirements.txt b/utils/changelog/requirements.txt new file mode 100644 index 00000000000..106e9e2c72d --- /dev/null +++ b/utils/changelog/requirements.txt @@ -0,0 +1,3 @@ +fuzzywuzzy +PyGitHub +python-Levenshtein diff --git a/website/benchmark/hardware/results/powerpc.json b/website/benchmark/hardware/results/powerpc.json new file mode 100644 index 00000000000..f6701126697 --- /dev/null +++ b/website/benchmark/hardware/results/powerpc.json @@ -0,0 +1,54 @@ +[ + { + "system": "POWER9 x_large VM", + "system_full": "POWER9 x_large VM (8 vCPU, pSeries, 16GiB RAM)", + "time": "2022-05-10 00:00:00", + "kind": "cloud", + "result": + [ +[0.263, 0.004, 0.004], +[0.352, 0.044, 0.040], +[1.556, 0.087, 0.083], +[2.341, 0.129, 0.120], +[0.490, 0.264, 0.258], +[4.565, 1.059, 1.038], +[0.042, 0.004, 0.004], +[0.132, 0.057, 0.062], +[3.168, 0.915, 0.917], +[2.020, 1.180, 1.183], +[1.510, 0.430, 0.435], +[1.525, 0.552, 0.607], +[3.104, 1.500, 1.529], +[2.459, 1.995, 1.975], +[5.291, 1.847, 1.849], +[2.552, 1.842, 1.920], +[7.378, 6.412, 6.283], +[3.881, 3.592, 3.552], +[13.757, 10.763, 10.880], +[0.504, 0.156, 0.134], +[16.285, 1.777, 1.770], +[5.135, 2.305, 2.242], +[21.215, 5.129, 5.183], +[35.624, 2.645, 2.476], +[2.648, 0.676, 0.715], +[0.815, 0.576, 0.568], +[1.365, 0.746, 0.692], +[4.857, 1.922, 1.898], +[14.380, 5.401, 5.445], +[4.555, 4.535, 4.371], +[4.095, 1.366, 1.349], +[7.474, 1.816, 1.859], +[14.235, 13.406, 13.854], +[14.165, 9.465, 9.715], +[11.313, 8.640, 8.462], +[3.605, 3.009, 2.813], +[0.662, 0.394, 0.391], +[0.350, 0.196, 0.194], +[0.217, 0.150, 0.145], +[1.025, 0.918, 0.895], +[0.254, 0.057, 0.057], +[0.132, 0.045, 0.046], +[0.047, 0.015, 0.012] + ] + } +]