diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f4cbcbf1790..9a96ccc9906 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -362,50 +362,50 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderBinGCC: - needs: [DockerHubPush] - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) - BUILD_NAME=binary_gcc - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - docker kill "$(docker ps -q)" ||: - docker rm -f "$(docker ps -a -q)" ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + # BuilderBinGCC: + # needs: [DockerHubPush] + # runs-on: [self-hosted, builder] + # steps: + # - name: Set envs + # run: | + # cat >> "$GITHUB_ENV" << 'EOF' + # TEMP_PATH=${{runner.temp}}/build_check + # IMAGES_PATH=${{runner.temp}}/images_path + # REPO_COPY=${{runner.temp}}/build_check/ClickHouse + # CACHES_PATH=${{runner.temp}}/../ccaches + # CHECK_NAME=ClickHouse build check (actions) + # BUILD_NAME=binary_gcc + # EOF + # - name: Download changed images + # uses: actions/download-artifact@v2 + # with: + # name: changed_images + # path: ${{ env.IMAGES_PATH }} + # - name: Clear repository + # run: | + # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + # - name: Check out repository code + # uses: actions/checkout@v2 + # - name: Build + # run: | + # git -C "$GITHUB_WORKSPACE" submodule sync --recursive + # git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + # sudo rm -fr "$TEMP_PATH" + # mkdir -p "$TEMP_PATH" + # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + # - name: Upload build URLs to artifacts + # if: ${{ success() || failure() }} + # uses: actions/upload-artifact@v2 + # with: + # name: ${{ env.BUILD_URLS }} + # path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + # - name: Cleanup + # if: always() + # run: | + # docker kill "$(docker ps -q)" ||: + # docker rm -f "$(docker ps -a -q)" ||: + # sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -1030,7 +1030,7 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - - BuilderBinGCC + # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinTidy - BuilderDebSplitted @@ -2685,40 +2685,40 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" - UnitTestsReleaseGCC: - needs: [BuilderBinGCC] - runs-on: [self-hosted, fuzzer-unit-tester] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/unit_tests_asan - REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-gcc, actions) - REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse - EOF - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Unit test - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" - python3 unit_tests_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill "$(docker ps -q)" ||: - docker rm -f "$(docker ps -a -q)" ||: - sudo rm -fr "$TEMP_PATH" + # UnitTestsReleaseGCC: + # needs: [BuilderBinGCC] + # runs-on: [self-hosted, fuzzer-unit-tester] + # steps: + # - name: Set envs + # run: | + # cat >> "$GITHUB_ENV" << 'EOF' + # TEMP_PATH=${{runner.temp}}/unit_tests_asan + # REPORTS_PATH=${{runner.temp}}/reports_dir + # CHECK_NAME=Unit tests (release-gcc, actions) + # REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + # EOF + # - name: Download json reports + # uses: actions/download-artifact@v2 + # with: + # path: ${{ env.REPORTS_PATH }} + # - name: Clear repository + # run: | + # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + # - name: Check out repository code + # uses: actions/checkout@v2 + # - name: Unit test + # run: | + # sudo rm -fr "$TEMP_PATH" + # mkdir -p "$TEMP_PATH" + # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + # cd "$REPO_COPY/tests/ci" + # python3 unit_tests_check.py "$CHECK_NAME" + # - name: Cleanup + # if: always() + # run: | + # docker kill "$(docker ps -q)" ||: + # docker rm -f "$(docker ps -a -q)" ||: + # sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index d93b17cbe79..c1ae4798716 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -375,50 +375,50 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderBinGCC: - needs: [DockerHubPush, FastTest] - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) - BUILD_NAME=binary_gcc - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ runner.temp }}/images_path - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - docker kill "$(docker ps -q)" ||: - docker rm -f "$(docker ps -a -q)" ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + # BuilderBinGCC: + # needs: [DockerHubPush, FastTest] + # runs-on: [self-hosted, builder] + # steps: + # - name: Set envs + # run: | + # cat >> "$GITHUB_ENV" << 'EOF' + # TEMP_PATH=${{runner.temp}}/build_check + # IMAGES_PATH=${{runner.temp}}/images_path + # REPO_COPY=${{runner.temp}}/build_check/ClickHouse + # CACHES_PATH=${{runner.temp}}/../ccaches + # CHECK_NAME=ClickHouse build check (actions) + # BUILD_NAME=binary_gcc + # EOF + # - name: Download changed images + # uses: actions/download-artifact@v2 + # with: + # name: changed_images + # path: ${{ runner.temp }}/images_path + # - name: Clear repository + # run: | + # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + # - name: Check out repository code + # uses: actions/checkout@v2 + # - name: Build + # run: | + # git -C "$GITHUB_WORKSPACE" submodule sync --recursive + # git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + # sudo rm -fr "$TEMP_PATH" + # mkdir -p "$TEMP_PATH" + # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + # - name: Upload build URLs to artifacts + # if: ${{ success() || failure() }} + # uses: actions/upload-artifact@v2 + # with: + # name: ${{ env.BUILD_URLS }} + # path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + # - name: Cleanup + # if: always() + # run: | + # docker kill "$(docker ps -q)" ||: + # docker rm -f "$(docker ps -a -q)" ||: + # sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAarch64: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -1077,7 +1077,7 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - - BuilderBinGCC + # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinTidy - BuilderDebSplitted @@ -2886,40 +2886,40 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" - UnitTestsReleaseGCC: - needs: [BuilderBinGCC] - runs-on: [self-hosted, fuzzer-unit-tester] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/unit_tests_asan - REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-gcc, actions) - REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse - EOF - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Unit test - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" - python3 unit_tests_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill "$(docker ps -q)" ||: - docker rm -f "$(docker ps -a -q)" ||: - sudo rm -fr "$TEMP_PATH" + # UnitTestsReleaseGCC: + # needs: [BuilderBinGCC] + # runs-on: [self-hosted, fuzzer-unit-tester] + # steps: + # - name: Set envs + # run: | + # cat >> "$GITHUB_ENV" << 'EOF' + # TEMP_PATH=${{runner.temp}}/unit_tests_asan + # REPORTS_PATH=${{runner.temp}}/reports_dir + # CHECK_NAME=Unit tests (release-gcc, actions) + # REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + # EOF + # - name: Download json reports + # uses: actions/download-artifact@v2 + # with: + # path: ${{ env.REPORTS_PATH }} + # - name: Clear repository + # run: | + # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + # - name: Check out repository code + # uses: actions/checkout@v2 + # - name: Unit test + # run: | + # sudo rm -fr "$TEMP_PATH" + # mkdir -p "$TEMP_PATH" + # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + # cd "$REPO_COPY/tests/ci" + # python3 unit_tests_check.py "$CHECK_NAME" + # - name: Cleanup + # if: always() + # run: | + # docker kill "$(docker ps -q)" ||: + # docker rm -f "$(docker ps -a -q)" ||: + # sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index 7a7eddf444d..9c55c619039 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -3,9 +3,14 @@ name: TagsStableWorkflow # - Sends it to JFROG Artifactory # - Adds them to the release assets +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + on: # yamllint disable-line rule:truthy push: tags: + - 'v*-prestable' - 'v*-stable' - 'v*-lts' @@ -15,26 +20,36 @@ jobs: runs-on: [self-hosted, style-checker] steps: - name: Get tag name - run: echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" + run: | + echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - name: Check out repository code uses: actions/checkout@v2 with: ref: master + fetch-depth: 0 - name: Generate versions + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - git fetch --tags ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv + GID=$(id -d "${UID}") + docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \ + --volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \ + /ClickHouse/utils/changelog/changelog.py -vv --gh-user-or-token="$GITHUB_TOKEN" \ + --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" --jobs=5 "${GITHUB_TAG}" + git add "./docs/changelogs/${GITHUB_TAG}.md" + git diff HEAD - name: Create Pull Request uses: peter-evans/create-pull-request@v3 with: author: "robot-clickhouse " committer: "robot-clickhouse " - commit-message: Update version_date.tsv after ${{ env.GITHUB_TAG }} + commit-message: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} branch: auto/${{ env.GITHUB_TAG }} delete-branch: true - title: Update version_date.tsv after ${{ env.GITHUB_TAG }} + title: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} body: | - Update version_date.tsv after ${{ env.GITHUB_TAG }} + Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} - Changelog category (leave one): + ### Changelog category (leave one): - Not for changelog (changelog entry is not required) diff --git a/.gitmodules b/.gitmodules index 6c9e66f9cbc..5fd9e9721f6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -262,3 +262,6 @@ [submodule "contrib/minizip-ng"] path = contrib/minizip-ng url = https://github.com/zlib-ng/minizip-ng +[submodule "contrib/wyhash"] + path = contrib/wyhash + url = https://github.com/wangyi-fudan/wyhash.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1f03c0fd341..94003f3b3ee 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -94,6 +94,7 @@ if (ENABLE_FUZZING) add_contrib (libprotobuf-mutator-cmake libprotobuf-mutator) endif() +add_contrib (wyhash-cmake wyhash) add_contrib (cityhash102) add_contrib (libfarmhash) add_contrib (icu-cmake icu) diff --git a/contrib/wyhash b/contrib/wyhash new file mode 160000 index 00000000000..991aa3dab62 --- /dev/null +++ b/contrib/wyhash @@ -0,0 +1 @@ +Subproject commit 991aa3dab624e50b066f7a02ccc9f6935cc740ec diff --git a/contrib/wyhash-cmake/CMakeLists.txt b/contrib/wyhash-cmake/CMakeLists.txt new file mode 100644 index 00000000000..679346d601a --- /dev/null +++ b/contrib/wyhash-cmake/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(wyhash INTERFACE) +target_include_directories(wyhash SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/wyhash") +add_library(ch_contrib::wyhash ALIAS wyhash) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index d9a5bb23a80..c547ae03a52 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -176,6 +176,7 @@ function clone_submodules contrib/NuRaft contrib/jemalloc contrib/replxx + contrib/wyhash ) git submodule sync diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 34414abc3f5..0cb25d12a9f 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -28,8 +28,11 @@ done set -e # cleanup for retry run if volume is not recreated -docker kill "$(docker ps -aq)" || true -docker rm "$(docker ps -aq)" || true +# shellcheck disable=SC2046 +{ + docker kill $(docker ps -aq) || true + docker rm $(docker ps -aq) || true +} echo "Start tests" export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index c05dab05f67..97b9225a2d2 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -203,6 +203,10 @@ clickhouse-client --query "SHOW TABLES FROM test" stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log +# NOTE Disable thread fuzzer before server start with data after stress test. +# In debug build it can take a lot of time. +unset "${!THREAD_@}" + start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 3101ab84c40..427010dbe37 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ libxml2-utils \ moreutils \ pylint \ + python3-fuzzywuzzy \ python3-pip \ shellcheck \ yamllint \ diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 651883511e8..67870eb3234 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -18,3 +18,6 @@ echo "Check workflows" | ts echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +echo "Check help for changelog generator works" | ts +cd ../changelog || exit 1 +./changelog.py -h 2>/dev/null 1>&2 diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index dadda55c830..687c03697d7 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -12,7 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN " SKIPPED_SIGN = "[ SKIPPED " HUNG_SIGN = "Found hung queries in processlist" -NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"] +SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] RETRIES_SIGN = "Some tests were restarted" @@ -25,14 +25,14 @@ def process_test_log(log_path): success = 0 hung = False retries = False - task_timeout = True + success_finish = False test_results = [] with open(log_path, "r") as test_file: for line in test_file: original_line = line line = line.strip() - if any(s in line for s in NO_TASK_TIMEOUT_SIGNS): - task_timeout = False + if any(s in line for s in SUCCESS_FINISH_SIGNS): + success_finish = True if HUNG_SIGN in line: hung = True if RETRIES_SIGN in line: @@ -81,7 +81,7 @@ def process_test_log(log_path): failed, success, hung, - task_timeout, + success_finish, retries, test_results, ) @@ -108,7 +108,7 @@ def process_result(result_path): failed, success, hung, - task_timeout, + success_finish, retries, test_results, ) = process_test_log(result_path) @@ -123,10 +123,10 @@ def process_result(result_path): description = "Some queries hung, " state = "failure" test_results.append(("Some queries hung", "FAIL", "0", "")) - elif task_timeout: - description = "Timeout, " + elif not success_finish: + description = "Tests are not finished, " state = "failure" - test_results.append(("Timeout", "FAIL", "0", "")) + test_results.append(("Tests are not finished", "FAIL", "0", "")) elif retries: description = "Some tests restarted, " test_results.append(("Some tests restarted", "SKIPPED", "0", "")) diff --git a/docs/changelogs/v22.1.1.2542-prestable.md b/docs/changelogs/v22.1.1.2542-prestable.md new file mode 100644 index 00000000000..b552da5cfb8 --- /dev/null +++ b/docs/changelogs/v22.1.1.2542-prestable.md @@ -0,0 +1,217 @@ +### ClickHouse release v22.1.1.2542-prestable FIXME as compared to v21.12.1.9017-prestable + +#### Backward Incompatible Change +* Change ZooKeeper path for zero-copy marks for shared data. Fix for remove marks in ZooKeeper for renamed parts. [#32061](https://github.com/ClickHouse/ClickHouse/pull/32061) ([ianton-ru](https://github.com/ianton-ru)). +* - Account for scalar subqueries. With this change, rows read in scalar subqueries are now reported in the query_log. If the scalar subquery is cached (repeated or called for several rows) the rows read are only counted once. This change allows KILLing queries and reporting progress while they are executing scalar subqueries. [#32271](https://github.com/ClickHouse/ClickHouse/pull/32271) ([Raúl Marín](https://github.com/Algunenano)). +* Add `left`, `right`, `leftUTF8`, `rightUTF8` functions. Fix error in implementation of `substringUTF8` function with negative offset (offset from the end of string). The functions `left` and `right` were previously implemented in parser. Upgrade notes: distributed queries with `left` or `right` functions without aliases may throw exception if cluster contains different versions of clickhouse-server. If you are upgrading your cluster and encounter this error, you should finish upgrading your cluster to ensure all nodes have the same version. Also you can add aliases (`AS something`) to the columns in your queries to avoid this issue. [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)). +* add grouping sets function, like GROUP BY grouping sets (a, b, (a, b)). [#26869](https://github.com/ClickHouse/ClickHouse/pull/26869) ([taylor12805](https://github.com/taylor12805)). +* Added an ability to read from all replicas within a shard during distributed query. To enable this, set `allow_experimental_parallel_reading_from_replicas=true` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Start and stop servers when hosts and ports configuration changes. [#30549](https://github.com/ClickHouse/ClickHouse/pull/30549) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Create any kind of view with comment. ... [#31062](https://github.com/ClickHouse/ClickHouse/pull/31062) ([Vasily Nemkov](https://github.com/Enmk)). +* Implement hive table engine to access apache hive from clickhouse. Related RFC: [#29245](https://github.com/ClickHouse/ClickHouse/issues/29245). [#31104](https://github.com/ClickHouse/ClickHouse/pull/31104) ([李扬](https://github.com/taiyang-li)). +* * Automatic cluster discovery via Zoo/Keeper. [#31442](https://github.com/ClickHouse/ClickHouse/pull/31442) ([Vladimir C](https://github.com/vdimir)). +* Adding support for disks backed by Azure Blob Storage, in a similar way it has been done for disks backed by AWS S3. Current implementation allows for all the basic disk operations. [#31505](https://github.com/ClickHouse/ClickHouse/pull/31505) ([Jakub Kuklis](https://github.com/jkuklis)). +* * Add "TABLE OVERRIDE" feature for customizing MaterializedMySQL table schemas. [#32325](https://github.com/ClickHouse/ClickHouse/pull/32325) ([Stig Bakken](https://github.com/stigsb)). +* Implement data schema inference for input formats. Allow to skip structure (or write just `auto`) in table functions `file`, `url`, `s3`, `hdfs` and in parameters of `clickhouse-local` . Allow to skip structure in create query for table engines `File`, `HDFS`, `S3`, `URL`, `Merge`, `Buffer`, `Distributed` and `ReplicatedMergeTree` (if we add new replicas). [#32455](https://github.com/ClickHouse/ClickHouse/pull/32455) ([Kruglov Pavel](https://github.com/Avogar)). +* Support TABLE OVERRIDE clause for MaterializedPostgreSQL. RFC: [#31480](https://github.com/ClickHouse/ClickHouse/issues/31480). [#32749](https://github.com/ClickHouse/ClickHouse/pull/32749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `EXPLAIN TABLE OVERRIDE` query. [#32836](https://github.com/ClickHouse/ClickHouse/pull/32836) ([Stig Bakken](https://github.com/stigsb)). +* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). +* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorial values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added function `arrayLast`. Closes [#33390](https://github.com/ClickHouse/ClickHouse/issues/33390). [#33415](https://github.com/ClickHouse/ClickHouse/pull/33415) ([Maksim Kita](https://github.com/kitaisreal)). +* Add MONTHNAME function. [#33436](https://github.com/ClickHouse/ClickHouse/pull/33436) ([usurai](https://github.com/usurai)). +* Auto detect file extension. Close [#30918](https://github.com/ClickHouse/ClickHouse/issues/30918). [#33443](https://github.com/ClickHouse/ClickHouse/pull/33443) ([zhongyuankai](https://github.com/zhongyuankai)). +* Added function `arrayLastIndex`. [#33465](https://github.com/ClickHouse/ClickHouse/pull/33465) ([Maksim Kita](https://github.com/kitaisreal)). +* Add new h3 miscellaneous functions: `h3DegsToRads`, `h3RadsToDegs`, `h3HexAreaKm2`, `h3CellAreaM2`, `h3CellAreaRads2`. [#33479](https://github.com/ClickHouse/ClickHouse/pull/33479) ([Bharat Nallan](https://github.com/bharatnc)). +* Detect format by file extension in file/hdfs/s3/url table functions and HDFS/S3/URL table engines. [#33565](https://github.com/ClickHouse/ClickHouse/pull/33565) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Non significant change. In extremely rare cases when data part is lost on every replica, after merging of some data parts, the subsequent queries may skip less amount of partitions during partition pruning. This hardly affects anything. [#32220](https://github.com/ClickHouse/ClickHouse/pull/32220) ([Azat Khuzhin](https://github.com/azat)). +* Slight performance improvement of `reinterpret` function. [#32587](https://github.com/ClickHouse/ClickHouse/pull/32587) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduce allocated memory for dictionaries with string attributes. [#33466](https://github.com/ClickHouse/ClickHouse/pull/33466) ([Maksim Kita](https://github.com/kitaisreal)). +* Avoid exponential backtracking in parser. This closes [#20158](https://github.com/ClickHouse/ClickHouse/issues/20158). [#33481](https://github.com/ClickHouse/ClickHouse/pull/33481) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Now date time conversion functions that generates time before 1970-01-01 00:00:00 will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). +* Added settings `command_read_timeout`, `command_write_timeout` for `StorageExecutable`, `StorageExecutablePool`, `ExecutableDictionary`, `ExecutablePoolDictionary`, `ExecutableUserDefinedFunctions`. Setting `command_read_timeout` controls timeout for reading data from command stdout in milliseconds. Setting `command_write_timeout` timeout for writing data to command stdin in milliseconds. Added settings `command_termination_timeout` for `ExecutableUserDefinedFunction`, `ExecutableDictionary`, `StorageExecutable`. Added setting `execute_direct` for `ExecutableUserDefinedFunction`, by default true. Added setting `execute_direct` for `ExecutableDictionary`, `ExecutablePoolDictionary`, by default false. [#30957](https://github.com/ClickHouse/ClickHouse/pull/30957) ([Maksim Kita](https://github.com/kitaisreal)). +* Optimize single part projection materialization. This closes [#31669](https://github.com/ClickHouse/ClickHouse/issues/31669). [#31885](https://github.com/ClickHouse/ClickHouse/pull/31885) ([Amos Bird](https://github.com/amosbird)). +* Enable comparison between `Decimal` and `Float`. Closes [#22626](https://github.com/ClickHouse/ClickHouse/issues/22626). [#31966](https://github.com/ClickHouse/ClickHouse/pull/31966) ([flynn](https://github.com/ucasfl)). +* - Ignore parse failure of opentelemetry's `traceparent` header. [#32116](https://github.com/ClickHouse/ClickHouse/pull/32116) ([Frank Chen](https://github.com/FrankChen021)). +* Improve keeper writing performance by optimization the size calculation logic. [#32366](https://github.com/ClickHouse/ClickHouse/pull/32366) ([zhanglistar](https://github.com/zhanglistar)). +* Allows to connect to mongodb 5.0. Closes [#31483](https://github.com/ClickHouse/ClickHouse/issues/31483),. [#32416](https://github.com/ClickHouse/ClickHouse/pull/32416) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable some optimizations for window functions. Closes [#31535](https://github.com/ClickHouse/ClickHouse/issues/31535). Closes [#31620](https://github.com/ClickHouse/ClickHouse/issues/31620). [#32453](https://github.com/ClickHouse/ClickHouse/pull/32453) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow empty credentials for mongo engine. Closes [#26267](https://github.com/ClickHouse/ClickHouse/issues/26267). [#32460](https://github.com/ClickHouse/ClickHouse/pull/32460) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve handling nested structures with missing columns while reading protobuf. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/31988. [#32531](https://github.com/ClickHouse/ClickHouse/pull/32531) ([Vitaly Baranov](https://github.com/vitlibar)). +* Events clause support for window view watch query. [#32607](https://github.com/ClickHouse/ClickHouse/pull/32607) ([vxider](https://github.com/Vxider)). +* Add settings `max_concurrent_select_queries` and `max_concurrent_insert_queries` for control concurrent queries by query kind. Close [#3575](https://github.com/ClickHouse/ClickHouse/issues/3575). [#32609](https://github.com/ClickHouse/ClickHouse/pull/32609) ([SuperDJY](https://github.com/cmsxbc)). +* support Date32 for `genarateRandom` engine. [#32643](https://github.com/ClickHouse/ClickHouse/pull/32643) ([nauta](https://github.com/nautaa)). +* Support authSource option for storage MongoDB. Closes [#32594](https://github.com/ClickHouse/ClickHouse/issues/32594). [#32702](https://github.com/ClickHouse/ClickHouse/pull/32702) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to control connection timeouts for mysql (previously was supported only for dictionary source). Closes [#16669](https://github.com/ClickHouse/ClickHouse/issues/16669). Previously default connect_timeout was rather small, now it is configurable. [#32734](https://github.com/ClickHouse/ClickHouse/pull/32734) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Flush all In-Memory data parts when WAL is not enabled while shutdown server or detaching table. [#32742](https://github.com/ClickHouse/ClickHouse/pull/32742) ([nauta](https://github.com/nautaa)). +* Improve gRPC compression support for [#28671](https://github.com/ClickHouse/ClickHouse/issues/28671). [#32747](https://github.com/ClickHouse/ClickHouse/pull/32747) ([Vitaly Baranov](https://github.com/vitlibar)). +* Added support for specifying subquery as SQL user defined function. Example: `CREATE FUNCTION test AS () -> (SELECT 1)`. Closes [#30755](https://github.com/ClickHouse/ClickHouse/issues/30755). [#32758](https://github.com/ClickHouse/ClickHouse/pull/32758) ([Maksim Kita](https://github.com/kitaisreal)). +* Support hints for clickhouse-client and clickhouse-local. Closes [#32237](https://github.com/ClickHouse/ClickHouse/issues/32237),. [#32841](https://github.com/ClickHouse/ClickHouse/pull/32841) ([凌涛](https://github.com/lingtaolf)). +* - Do not prepend THREADS_COUNT with -j to avoid additional prepending in subprocesses. [#32844](https://github.com/ClickHouse/ClickHouse/pull/32844) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Added support for `BIT` data type in `MaterializedMySQL`. Closes [#15182](https://github.com/ClickHouse/ClickHouse/issues/15182), [#32233](https://github.com/ClickHouse/ClickHouse/issues/32233). [#32900](https://github.com/ClickHouse/ClickHouse/pull/32900) ([zzsmdfj](https://github.com/zzsmdfj)). +* More efficient handling of globs for url storage. Closes [#32866](https://github.com/ClickHouse/ClickHouse/issues/32866). [#32907](https://github.com/ClickHouse/ClickHouse/pull/32907) ([Kseniia Sumarokova](https://github.com/kssenii)). +* This only happens in unofficial builds. Fixed segfault when inserting data into compressed Decimal, String, FixedString and Array columns. This closes [#32939](https://github.com/ClickHouse/ClickHouse/issues/32939). [#32940](https://github.com/ClickHouse/ClickHouse/pull/32940) ([N. Kolotov](https://github.com/nkolotov)). +* Dictionaries added `Date32` date type support. Closes [#32913](https://github.com/ClickHouse/ClickHouse/issues/32913). [#32971](https://github.com/ClickHouse/ClickHouse/pull/32971) ([Maksim Kita](https://github.com/kitaisreal)). +* Short circuit evaluation function `throwIf` support. Closes [#32969](https://github.com/ClickHouse/ClickHouse/issues/32969). [#32973](https://github.com/ClickHouse/ClickHouse/pull/32973) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve Bool type serialization and deserialization. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). +* Send profile info in clickhouse-local. Closes [#33093](https://github.com/ClickHouse/ClickHouse/issues/33093). [#33097](https://github.com/ClickHouse/ClickHouse/pull/33097) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([李扬](https://github.com/taiyang-li)). +* Validate config keys for external dictionaries. [#33095](https://github.com/ClickHouse/ClickHouse/issues/33095)#issuecomment-1000577517. [#33130](https://github.com/ClickHouse/ClickHouse/pull/33130) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add retry for Postgres connect in case nothing has been fetched yet. Closes [#33199](https://github.com/ClickHouse/ClickHouse/issues/33199). [#33209](https://github.com/ClickHouse/ClickHouse/pull/33209) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly prevent nullable primary keys if necessary. This is for [#32780](https://github.com/ClickHouse/ClickHouse/issues/32780). [#33218](https://github.com/ClickHouse/ClickHouse/pull/33218) ([Amos Bird](https://github.com/amosbird)). +* If storage supports SETTINGS allow to pass them as key value or via config. Add this support for mysql. [#33231](https://github.com/ClickHouse/ClickHouse/pull/33231) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to cancel formats Arrow / Parquet / ORC which failed to be cancelled it case of big files and setting input_format_allow_seeks as false. Closes [#29678](https://github.com/ClickHouse/ClickHouse/issues/29678). [#33238](https://github.com/ClickHouse/ClickHouse/pull/33238) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* parseDateTimeBestEffort support Unix Timestamp with Milliseconds. [#33276](https://github.com/ClickHouse/ClickHouse/pull/33276) ([Ben](https://github.com/benbiti)). +* Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)). +* Pressing Ctrl+C twice will terminate `clickhouse-benchmark` immediately without waiting for in-flight queries. This closes [#32586](https://github.com/ClickHouse/ClickHouse/issues/32586). [#33303](https://github.com/ClickHouse/ClickHouse/pull/33303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* support `` in cluster configuration. Close [#33270](https://github.com/ClickHouse/ClickHouse/issues/33270). [#33330](https://github.com/ClickHouse/ClickHouse/pull/33330) ([SuperDJY](https://github.com/cmsxbc)). +* `LineAsString` can be used as output format. This closes [#30919](https://github.com/ClickHouse/ClickHouse/issues/30919). [#33331](https://github.com/ClickHouse/ClickHouse/pull/33331) ([Sergei Trifonov](https://github.com/serxa)). +* Allow negative intervals in function `intervalLengthSum`. Their length will be added as well. This closes [#33323](https://github.com/ClickHouse/ClickHouse/issues/33323). [#33335](https://github.com/ClickHouse/ClickHouse/pull/33335) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* clickhouse-local: track memory under --max_memory_usage_in_client option. [#33341](https://github.com/ClickHouse/ClickHouse/pull/33341) ([Azat Khuzhin](https://github.com/azat)). +* Make installation script working on FreeBSD. This closes [#33384](https://github.com/ClickHouse/ClickHouse/issues/33384). [#33418](https://github.com/ClickHouse/ClickHouse/pull/33418) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add some building options in system.build_options. [#33431](https://github.com/ClickHouse/ClickHouse/pull/33431) ([李扬](https://github.com/taiyang-li)). +* Abuse of `untuple` function was leading to exponential complexity of query analysis (found by fuzzer). This closes [#33297](https://github.com/ClickHouse/ClickHouse/issues/33297). [#33445](https://github.com/ClickHouse/ClickHouse/pull/33445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add config to enable ipv4 or ipv6. This close [#33381](https://github.com/ClickHouse/ClickHouse/issues/33381). [#33450](https://github.com/ClickHouse/ClickHouse/pull/33450) ([Wu Xueyang](https://github.com/wuxueyang96)). +* add function decodeURLFormComponent. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). +* Implement Materialized view `getVirtuals` function. Close [#11210](https://github.com/ClickHouse/ClickHouse/issues/11210). [#33482](https://github.com/ClickHouse/ClickHouse/pull/33482) ([zhongyuankai](https://github.com/zhongyuankai)). + +#### Bug Fix +* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). +* - Clickhouse Keeper handler should remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)). +* Fix null pointer dereference in low cardinality data when deserializing LowCardinality data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)). + +#### Build/Testing/Packaging Improvement +* - Add arm64 packages - Stream python logs in realtime with `PYTHONUNBUFFERED=1` - Fix building docker images in docker/packager/packager script. [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Terminate build when linker path not found. [#32437](https://github.com/ClickHouse/ClickHouse/pull/32437) ([JackyWoo](https://github.com/JackyWoo)). +* - Create a global ENV per job - Clean CCACHE after a build is over. [#32478](https://github.com/ClickHouse/ClickHouse/pull/32478) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* PENDING (Should mention submodule updates and versions). [#32484](https://github.com/ClickHouse/ClickHouse/pull/32484) ([Raúl Marín](https://github.com/Algunenano)). +* Remove readline support. [#32574](https://github.com/ClickHouse/ClickHouse/pull/32574) ([Azat Khuzhin](https://github.com/azat)). +* Fix build issue related to azure blob storage. [#32788](https://github.com/ClickHouse/ClickHouse/pull/32788) ([Amos Bird](https://github.com/amosbird)). +* - Unify init scripts for every worker runner type - Install pigz in AMI. [#32800](https://github.com/ClickHouse/ClickHouse/pull/32800) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use all available in container processors for PVS studio check and fast tests. Delete coverage image. [#32854](https://github.com/ClickHouse/ClickHouse/pull/32854) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Rename main to pull_request - Add BuilderDebAarch64 to all workflows - Use docker buildkit, save building cache to docker hub and reuse it - Build x86_64 and arm64 docker images separately, then merge them together to a multi-architecture manifest - Tune many docker images to being multi-architecture - Use the images from the current PR/commit in the following dependent builds - Upgrade mysql client in stateless-tests image - Add functional tests for aarch64 for PR actions (forced green for a while) - Add python typing to some scripts - Add docker buildkit to runners' init script - Add func-tester-aarch64 runners - Use `docker login --password-stdin` to not expose password on exception. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Enable hermetic build for shared builds. This is mainly for developers. [#32968](https://github.com/ClickHouse/ClickHouse/pull/32968) ([Amos Bird](https://github.com/amosbird)). +* Prepare ClickHouse to be built with musl-libc. It is not enabled by default. [#33134](https://github.com/ClickHouse/ClickHouse/pull/33134) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a simple tool to visualize flaky tests in web browser. [#33185](https://github.com/ClickHouse/ClickHouse/pull/33185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid strict checking when `ENABLE_AZURE_BLOB_STORAGE = 0`. https://github.com/ClickHouse/ClickHouse/pull/32948#discussion_r773168611 cc @nikitamikhaylov. [#33219](https://github.com/ClickHouse/ClickHouse/pull/33219) ([Amos Bird](https://github.com/amosbird)). +* Add more tests for the nullable primary key feature. Add more tests with different types and merge tree kinds, plus randomly generated data. [#33228](https://github.com/ClickHouse/ClickHouse/pull/33228) ([Amos Bird](https://github.com/amosbird)). +* Don't use particular encoding for diff-strings, it may contain multiple different encodings. [#33336](https://github.com/ClickHouse/ClickHouse/pull/33336) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid strict checking when ENABLE_AZURE_BLOB_STORAGE = 0. This is another try on behalf of https://github.com/ClickHouse/ClickHouse/pull/33219 , which was reverted likely due to CI issues. [#33346](https://github.com/ClickHouse/ClickHouse/pull/33346) ([Amos Bird](https://github.com/amosbird)). +* During migration from Yandex to github actions we've lost static links to the latest master ([doc](https://clickhouse.com/docs/en/getting-started/install/#from-single-binary)) It solves issue [#33480](https://github.com/ClickHouse/ClickHouse/issues/33480) partially. [#33559](https://github.com/ClickHouse/ClickHouse/pull/33559) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Restore a lost description checking. [#33591](https://github.com/ClickHouse/ClickHouse/pull/33591) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add `actionlint` for workflows and verify workflow files via `act --list` to check the correct workflow syntax. [#33612](https://github.com/ClickHouse/ClickHouse/pull/33612) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove editing /etc/hosts from Dockerfile. [#33635](https://github.com/ClickHouse/ClickHouse/pull/33635) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Properly separate thrift-cmake from arrow-cmake after https://github.com/ClickHouse/ClickHouse/pull/31104 . cc @taiyang-li. [#33661](https://github.com/ClickHouse/ClickHouse/pull/33661) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fixed CAST from String to IPv4 or IPv6 and back. Fixed error message in case of failed conversion. [#29224](https://github.com/ClickHouse/ClickHouse/pull/29224) ([Dmitry Novik](https://github.com/novikd)). +* Fix base64Encode adding trailing bytes on small strings. [#31797](https://github.com/ClickHouse/ClickHouse/pull/31797) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). +* Fix 'APPLY lambda' parsing which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). +* Some replication queue entries might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix unix timestamp Millisecond convert to DateTime64, fractional part calc reversed. [#32240](https://github.com/ClickHouse/ClickHouse/pull/32240) ([Ben](https://github.com/benbiti)). +* Fix broken select query when there are more than 2 row policies on same column, begin at second queries on the same session. [#31606](https://github.com/ClickHouse/ClickHouse/issues/31606). [#32291](https://github.com/ClickHouse/ClickHouse/pull/32291) ([SuperDJY](https://github.com/cmsxbc)). +* Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). +* Fix issue with ambiguous query formatting in distributed queries that led to errors when some table columns were named ALL or DISTINCT. This closes [#32391](https://github.com/ClickHouse/ClickHouse/issues/32391). [#32490](https://github.com/ClickHouse/ClickHouse/pull/32490) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The Proper handle of the case with apache arrow column duplication. [#32507](https://github.com/ClickHouse/ClickHouse/pull/32507) ([Dmitriy Mokhnatkin](https://github.com/DMokhnatkin)). +* Fix crash in `JoinCommon::removeColumnNullability`, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([Vladimir C](https://github.com/vdimir)). +* fix groupBitmapAnd function on distributed table. [#32529](https://github.com/ClickHouse/ClickHouse/pull/32529) ([minhthucdao](https://github.com/dmthuc)). +* Fix async inserts with formats CustomSeparated, Template, Regexp, MsgPack and JSONAsString. Previousely async inserts with these formats didn't read any data. [#32530](https://github.com/ClickHouse/ClickHouse/pull/32530) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix sparse_hashed dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). +* Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible exception at RabbitMQ storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix surprisingly bad code in function `file`. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* close [#32504](https://github.com/ClickHouse/ClickHouse/issues/32504). [#32649](https://github.com/ClickHouse/ClickHouse/pull/32649) ([Vladimir C](https://github.com/vdimir)). +* Fix LOGICAL_ERROR when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). +* Fix `optimize_read_in_order` optimization in case when table engine is `Distributed` or `Merge` and its underlying `MergeTree` tables have monotonous function in prefix of sorting key. [#32670](https://github.com/ClickHouse/ClickHouse/pull/32670) ([Anton Popov](https://github.com/CurtizJ)). +* Fix `ALTER TABLE ... MATERIALIZE TTL` query with `TTL ... DELETE WHERE ...` and `TTL ... GROUP BY ...` modes. [#32695](https://github.com/ClickHouse/ClickHouse/pull/32695) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Column is not under aggregate function` in case of MV with `GROUP BY (list of columns)` (which is pared as `GROUP BY tuple(...)`) over Kafka/RabbitMQ. Fixes [#32668](https://github.com/ClickHouse/ClickHouse/issues/32668) and [#32744](https://github.com/ClickHouse/ClickHouse/issues/32744). [#32751](https://github.com/ClickHouse/ClickHouse/pull/32751) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). +* Fix `--database` option for clickhouse-local. [#32797](https://github.com/ClickHouse/ClickHouse/pull/32797) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Avoid reusing the scalar subquery cache when processing MV blocks. This fixes a bug when the scalar query reference the source table but it means that all subscalar queries in the MV definition will be calculated for each block. [#32811](https://github.com/ClickHouse/ClickHouse/pull/32811) ([Raúl Marín](https://github.com/Algunenano)). +* `MergeTree` table engine might silently skip some mutations if there are too many running mutations or in case of high memory consumption, it's fixed. Fixes [#17882](https://github.com/ClickHouse/ClickHouse/issues/17882). [#32814](https://github.com/ClickHouse/ClickHouse/pull/32814) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix optimization with lazy seek for async reads from remote fs. Closes [#32803](https://github.com/ClickHouse/ClickHouse/issues/32803). [#32835](https://github.com/ClickHouse/ClickHouse/pull/32835) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed --echo option is not used by clickhouse-client in batch mode with single query. [#32843](https://github.com/ClickHouse/ClickHouse/pull/32843) ([N. Kolotov](https://github.com/nkolotov)). +* Fix MV query with multiple chunk result. Fixes [#31419](https://github.com/ClickHouse/ClickHouse/issues/31419). [#32862](https://github.com/ClickHouse/ClickHouse/pull/32862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Close [#32487](https://github.com/ClickHouse/ClickHouse/issues/32487). [#32914](https://github.com/ClickHouse/ClickHouse/pull/32914) ([Vladimir C](https://github.com/vdimir)). +* Fix ORC stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Fix a regression in `replaceRegexpAll` function. The function worked incorrectly when matched substring was empty. This closes [#32777](https://github.com/ClickHouse/ClickHouse/issues/32777). This closes [#30245](https://github.com/ClickHouse/ClickHouse/issues/30245). [#32945](https://github.com/ClickHouse/ClickHouse/pull/32945) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UB in case of unexpected EOF during filling a set from HTTP query (i.e. if the client interrupted in the middle, i.e. `timeout 0.15s curl -Ss -F 's=@t.csv;' 'http://127.0.0.1:8123/?s_structure=key+Int&query=SELECT+dummy+IN+s'` and with large enough `t.csv`). [#32955](https://github.com/ClickHouse/ClickHouse/pull/32955) ([Azat Khuzhin](https://github.com/azat)). +* Fix throwing exception like positional argument out of bounds for non-positional arguments. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173)#event-5789668239. [#32961](https://github.com/ClickHouse/ClickHouse/pull/32961) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong tuple output in CSV format in case of custom csv delimiter. [#32981](https://github.com/ClickHouse/ClickHouse/pull/32981) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix Context leak in case of cancel_http_readonly_queries_on_client_close (i.e. leaking of external tables that had been uploaded the the server and other resources). [#32982](https://github.com/ClickHouse/ClickHouse/pull/32982) ([Azat Khuzhin](https://github.com/azat)). +* Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix s3 table function reading empty file. Closes [#33008](https://github.com/ClickHouse/ClickHouse/issues/33008). [#33037](https://github.com/ClickHouse/ClickHouse/pull/33037) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix error `Invalid version for SerializationLowCardinality key column` in case of reading from `LowCardinality` column with `local_filesystem_read_prefetch` or `remote_filesystem_read_prefetch` enabled. [#33046](https://github.com/ClickHouse/ClickHouse/pull/33046) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow to write into S3 if path contains globs. [#33142](https://github.com/ClickHouse/ClickHouse/pull/33142) ([Kruglov Pavel](https://github.com/Avogar)). +* fix incorrect metric: StorageBufferBytes. [#33159](https://github.com/ClickHouse/ClickHouse/pull/33159) ([xuyatian](https://github.com/xuyatian)). +* Fix MaterializedPostreSQL detach/attach (removing / adding to replication) tables with non-default schema. Found in [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33179](https://github.com/ClickHouse/ClickHouse/pull/33179) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make sure unused replication slots are always removed. Found in [#26952](https://github.com/ClickHouse/ClickHouse/issues/26952),. [#33187](https://github.com/ClickHouse/ClickHouse/pull/33187) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ddl validation. Fix setting `materialized_postgresql_allow_automatic_update`. Closes [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33200](https://github.com/ClickHouse/ClickHouse/pull/33200) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix WHERE 1=0 for external databases query. Closes [#33152](https://github.com/ClickHouse/ClickHouse/issues/33152). [#33214](https://github.com/ClickHouse/ClickHouse/pull/33214) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add asynchronous inserts (with enabled setting `async_insert`) to query log. Previously such queries didn't appear in query log. [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239) ([Anton Popov](https://github.com/CurtizJ)). +* Fix ACLMap num, because acl_to_num will erase. [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246) ([小路](https://github.com/nicelulu)). +* Fix ACL with explicit digit hash in clickhouse-keeper: now the behavior consistent with zookeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([小路](https://github.com/nicelulu)). +* Fix when `COMMENT` for dictionaries does not appear in `system.tables`, `system.dictionaries`. Allow to modify comment for `Dictionary` engine. Closes [#33251](https://github.com/ClickHouse/ClickHouse/issues/33251). [#33261](https://github.com/ClickHouse/ClickHouse/pull/33261) ([Maksim Kita](https://github.com/kitaisreal)). +* The commands `SYSTEM SUSPEND` and `SYSTEM ... THREAD FUZZER` missed access control. It is fixed. Author: Kevin Michel. [#33333](https://github.com/ClickHouse/ClickHouse/pull/33333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not try to read pass EOF (to workaround a bug in a kernel), this bug can be reproduced on kernels (3.14..5.9), and requires `index_granularity_bytes=0` (i.e. turn off adaptive index granularity). [#33372](https://github.com/ClickHouse/ClickHouse/pull/33372) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible use-after-free for INSERT into MV with concurrent DROP ([#32572](https://github.com/ClickHouse/ClickHouse/issues/32572) significantly reduce the race window, this one should completely eliminate it). [#33386](https://github.com/ClickHouse/ClickHouse/pull/33386) ([Azat Khuzhin](https://github.com/azat)). +* Fix query cancellation in case of allow_experimental_parallel_reading_from_replicas. [#33456](https://github.com/ClickHouse/ClickHouse/pull/33456) ([Azat Khuzhin](https://github.com/azat)). +* Fix DROP MaterializedPostgreSQL database. [#33468](https://github.com/ClickHouse/ClickHouse/pull/33468) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix dictionary expressions for RangeHashedDictionary range min and range max attributes when created using DDL. Closes [#30809](https://github.com/ClickHouse/ClickHouse/issues/30809). [#33478](https://github.com/ClickHouse/ClickHouse/pull/33478) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix an exception `Block structure mismatch` which may happen during insertion into table with default nested `LowCardinality` column. Fixes [#33028](https://github.com/ClickHouse/ClickHouse/issues/33028). [#33504](https://github.com/ClickHouse/ClickHouse/pull/33504) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Close issue: [#33289](https://github.com/ClickHouse/ClickHouse/issues/33289) Fix bug when query view with setting offset and limit. [#33518](https://github.com/ClickHouse/ClickHouse/pull/33518) ([hexiaoting](https://github.com/hexiaoting)). +* Fix parsing incorrect queries with FROM INFILE statement. [#33521](https://github.com/ClickHouse/ClickHouse/pull/33521) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix segfault in arrowSchemaToCHHeader if schema contains Dictionary type. Closes [#33507](https://github.com/ClickHouse/ClickHouse/issues/33507). [#33529](https://github.com/ClickHouse/ClickHouse/pull/33529) ([Kruglov Pavel](https://github.com/Avogar)). +* session_id_counter poniter to next slot. [#33555](https://github.com/ClickHouse/ClickHouse/pull/33555) ([小路](https://github.com/nicelulu)). +* Fix segfault in Avro that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong database for JOIN w/o explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Update CHANGELOG.md'. [#32472](https://github.com/ClickHouse/ClickHouse/pull/32472) ([Rich Raposa](https://github.com/rfraposa)). +* NO CL ENTRY: 'Revert "Split long tests into multiple checks"'. [#32514](https://github.com/ClickHouse/ClickHouse/pull/32514) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Revert "Split long tests into multiple checks""'. [#32515](https://github.com/ClickHouse/ClickHouse/pull/32515) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'blog post how to enable predictive capabilities in Clickhouse'. [#32768](https://github.com/ClickHouse/ClickHouse/pull/32768) ([Tom Risse](https://github.com/flickerbox-tom)). +* NO CL ENTRY: 'Revert "Fix build issue related to azure blob storage"'. [#32845](https://github.com/ClickHouse/ClickHouse/pull/32845) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Dictionaries added Date32 type support"'. [#33053](https://github.com/ClickHouse/ClickHouse/pull/33053) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Updated Lawrence Berkeley National Lab stats'. [#33066](https://github.com/ClickHouse/ClickHouse/pull/33066) ([Michael Smitasin](https://github.com/michaelsmitasin)). +* NO CL ENTRY: 'fix AggregateFunctionGroupBitmapData function rb_contains rb_remove'. [#33127](https://github.com/ClickHouse/ClickHouse/pull/33127) ([DR](https://github.com/freedomDR)). +* NO CL ENTRY: 'Fix for example request with settings'. [#33143](https://github.com/ClickHouse/ClickHouse/pull/33143) ([Vitaly Artemyev](https://github.com/VitalyArt)). +* NO CL ENTRY: 'Revert "Grouping sets dev"'. [#33186](https://github.com/ClickHouse/ClickHouse/pull/33186) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Optimize MergeTreePartsMover'. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([zhongyuankai](https://github.com/zhongyuankai)). +* NO CL ENTRY: 'rm redundant judge in hashmap iter operation'. [#33285](https://github.com/ClickHouse/ClickHouse/pull/33285) ([zbtzbtzbt](https://github.com/zbtzbtzbt)). +* NO CL ENTRY: 'fix hang up with command 'drop table system.query_log sync''. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)). +* NO CL ENTRY: 'Improve query performance of system tables'. [#33312](https://github.com/ClickHouse/ClickHouse/pull/33312) ([zhongyuankai](https://github.com/zhongyuankai)). +* NO CL ENTRY: 'Revert "Better cmake script for azure blob"'. [#33319](https://github.com/ClickHouse/ClickHouse/pull/33319) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Added Superwall to adopters list'. [#33573](https://github.com/ClickHouse/ClickHouse/pull/33573) ([Justin Hilliard](https://github.com/jahilliard)). +* NO CL ENTRY: 'Revert "Ignore parse failure of opentelemetry header"'. [#33594](https://github.com/ClickHouse/ClickHouse/pull/33594) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release): + +* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL CATEGORY + +* Fix Regular Expression while key path search. [#33023](https://github.com/ClickHouse/ClickHouse/pull/33023) ([mreddy017](https://github.com/mreddy017)). +* - Allow to split GraphiteMergeTree rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). + +#### Bug Fix (v21.9.4.35-stable) + +* Fix [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). + +#### New Feature / New Tool + +* Tool for collecting diagnostics data. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)). + diff --git a/docs/changelogs/v22.1.2.2-stable.md b/docs/changelogs/v22.1.2.2-stable.md new file mode 100644 index 00000000000..450c640bc5e --- /dev/null +++ b/docs/changelogs/v22.1.2.2-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v22.1.2.2-stable FIXME as compared to v22.1.1.2542-prestable + diff --git a/docs/changelogs/v22.1.3.7-stable.md b/docs/changelogs/v22.1.3.7-stable.md new file mode 100644 index 00000000000..ffb0ec6048d --- /dev/null +++ b/docs/changelogs/v22.1.3.7-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v22.1.3.7-stable FIXME as compared to v22.1.2.2-stable + diff --git a/docs/changelogs/v22.1.4.30-stable.md b/docs/changelogs/v22.1.4.30-stable.md new file mode 100644 index 00000000000..1ea56131481 --- /dev/null +++ b/docs/changelogs/v22.1.4.30-stable.md @@ -0,0 +1,19 @@ +### ClickHouse release v22.1.4.30-stable FIXME as compared to v22.1.3.7-stable + +#### Build/Testing/Packaging Improvement +* Backport CI checks to 22.1 release branch. [#34897](https://github.com/ClickHouse/ClickHouse/pull/34897) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#34119](https://github.com/ClickHouse/ClickHouse/issues/34119): Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34124](https://github.com/ClickHouse/ClickHouse/issues/34124): Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34098](https://github.com/ClickHouse/ClickHouse/issues/34098): Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#34038](https://github.com/ClickHouse/ClickHouse/issues/34038): Fix bug which lead to inability for server to start when both replicated access storage and keeper are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)). +* Backported in [#34184](https://github.com/ClickHouse/ClickHouse/issues/34184): Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#34467](https://github.com/ClickHouse/ClickHouse/issues/34467): Fix inserts to distributed tables in case of change of native protocol. The last change was in the version version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34209](https://github.com/ClickHouse/ClickHouse/issues/34209): Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3. [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)). +* Backported in [#34266](https://github.com/ClickHouse/ClickHouse/issues/34266): Fix metric `Query`, which shows number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34298](https://github.com/ClickHouse/ClickHouse/issues/34298): Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#34391](https://github.com/ClickHouse/ClickHouse/issues/34391): Try to fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34430](https://github.com/ClickHouse/ClickHouse/issues/34430): Fix segfault in schema inference from url. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v22.2.1.2139-prestable.md b/docs/changelogs/v22.2.1.2139-prestable.md new file mode 100644 index 00000000000..187a2a26a66 --- /dev/null +++ b/docs/changelogs/v22.2.1.2139-prestable.md @@ -0,0 +1,208 @@ +### ClickHouse release v22.2.1.2139-prestable FIXME as compared to v22.1.1.2542-prestable + +#### New Feature +* (Not ready for production, put into experimental features) Add memory overcommit to `MemoryTracker`. Added `guaranteed` settings for memory limits which represent soft memory limits. In case when hard memory limit is reached, `MemoryTracker` tries to cancel the most overcommited query. New setting `memory_usage_overcommit_max_wait_microseconds` specifies how long queries may wait another query to stop. Closes [#28375](https://github.com/ClickHouse/ClickHouse/issues/28375). [#31182](https://github.com/ClickHouse/ClickHouse/pull/31182) ([Dmitry Novik](https://github.com/novikd)). +* The setting allows a user to provide own deduplication semantic in MergeTree/ReplicatedMergeTree If provided, it's used instead of data digest to generate block ID. So, for example, by providing a unique value for the setting in each INSERT statement, the user can avoid the same inserted data being deduplicated. This closes: [#7461](https://github.com/ClickHouse/ClickHouse/issues/7461). [#32304](https://github.com/ClickHouse/ClickHouse/pull/32304) ([Igor Nikonov](https://github.com/devcrafter)). +* Add support of DEFAULT keyword for INSERT statements. Closes [#6331](https://github.com/ClickHouse/ClickHouse/issues/6331). [#33141](https://github.com/ClickHouse/ClickHouse/pull/33141) ([Andrii Buriachevskyi](https://github.com/0over)). +* Add confidence intervals to ttests. [#33260](https://github.com/ClickHouse/ClickHouse/pull/33260) ([achimbab](https://github.com/achimbab)). +* Allow to create new files on insert for File/S3/HDFS engines. Allow to owerwrite file in HDFS. Throw an exception in attempt to overwrite a file in S3 by default. Throw an exception in attempt to append data to file in formats that have suffix. Closes [#31640](https://github.com/ClickHouse/ClickHouse/issues/31640) Closes [#31622](https://github.com/ClickHouse/ClickHouse/issues/31622) Closes [#23862](https://github.com/ClickHouse/ClickHouse/issues/23862) Closes [#15022](https://github.com/ClickHouse/ClickHouse/issues/15022) Closes [#16674](https://github.com/ClickHouse/ClickHouse/issues/16674). [#33302](https://github.com/ClickHouse/ClickHouse/pull/33302) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `h3ToCenterChild` function. [#33313](https://github.com/ClickHouse/ClickHouse/pull/33313) ([Bharat Nallan](https://github.com/bharatnc)). +* Merge functions for text classification. See [#23271](https://github.com/ClickHouse/ClickHouse/issues/23271). [#33314](https://github.com/ClickHouse/ClickHouse/pull/33314) ([Nikolay Degterinsky](https://github.com/evillique)). +* Implemented meanZTest. [#33354](https://github.com/ClickHouse/ClickHouse/pull/33354) ([achimbab](https://github.com/achimbab)). +* - Add function bitSlice. [#33360](https://github.com/ClickHouse/ClickHouse/pull/33360) ([RogerYK](https://github.com/RogerYK)). +* Add new h3 miscellaneous functions: `edgeLengthKm`,`exactEdgeLengthKm`,`exactEdgeLengthM`,`exactEdgeLengthRads`,`numHexagons`. [#33621](https://github.com/ClickHouse/ClickHouse/pull/33621) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `DEGREES` and `RADIANS` functions. [#33769](https://github.com/ClickHouse/ClickHouse/pull/33769) ([Bharat Nallan](https://github.com/bharatnc)). +* Parameter `--host` can accept multiple hosts. In case of unavailability of one of them, the client will try to connect to the next one. [#33824](https://github.com/ClickHouse/ClickHouse/pull/33824) ([Filippov Denis](https://github.com/DF5HSE)). +* Detect format in clickhouse-local by file name. [#33829](https://github.com/ClickHouse/ClickHouse/pull/33829) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new method `expire()` in PoolBase which is used to reallocate an invalid object in the pool. [#34076](https://github.com/ClickHouse/ClickHouse/pull/34076) ([lgbo](https://github.com/lgbo-ustc)). +* Add table function `format(format_name, data)`. [#34125](https://github.com/ClickHouse/ClickHouse/pull/34125) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to create default table engine. [#34187](https://github.com/ClickHouse/ClickHouse/pull/34187) ([Ilya Yatsishin](https://github.com/qoega)). +* `EPHEMERAL` column specifier is added to `CREATE TABLE` query. Closes [#9436](https://github.com/ClickHouse/ClickHouse/issues/9436). [#34424](https://github.com/ClickHouse/ClickHouse/pull/34424) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### Performance Improvement +* For inserts and merges into S3, write files in parallel whenever possible. [#33291](https://github.com/ClickHouse/ClickHouse/pull/33291) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve RangeHashedDictionary performance if for key there are a lot of intervals. Fixes [#23821](https://github.com/ClickHouse/ClickHouse/issues/23821). [#33516](https://github.com/ClickHouse/ClickHouse/pull/33516) ([Maksim Kita](https://github.com/kitaisreal)). +* Add x86 avx512 support for memcmpSmall functions to accelerate memory comparison. It works only if you compile ClickHouse by yourself. [#33706](https://github.com/ClickHouse/ClickHouse/pull/33706) ([hanqf-git](https://github.com/hanqf-git)). +* Reworks and reintroduces the scalar cache to MV execution. [#33958](https://github.com/ClickHouse/ClickHouse/pull/33958) ([Raúl Marín](https://github.com/Algunenano)). +* Make ORDER BY tuple almost as fast as ORDER BY columns. We have special optimizations for multiple column ORDER BY: https://github.com/ClickHouse/ClickHouse/pull/10831 . It's beneficial to also apply to tuple columns. [#34060](https://github.com/ClickHouse/ClickHouse/pull/34060) ([Amos Bird](https://github.com/amosbird)). +* Minor improvement to potential hot-path in `ExecuteScalarSubqueriesMatcher::visit` , where `std::set` was constructed on every function invocation. [#34128](https://github.com/ClickHouse/ClickHouse/pull/34128) ([Federico Rodriguez](https://github.com/fedrod)). +* Slightly improve performance of `Regexp` format. [#34202](https://github.com/ClickHouse/ClickHouse/pull/34202) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Optimize quantilesExact{Low,High} to use nth_element instead of sort. [#34287](https://github.com/ClickHouse/ClickHouse/pull/34287) ([Daniel Kutenin](https://github.com/danlark1)). +* Improve performance of `LineAsString` format. This closes [#34303](https://github.com/ClickHouse/ClickHouse/issues/34303). [#34306](https://github.com/ClickHouse/ClickHouse/pull/34306) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up loading of data parts. It was not parallelized before: the setting `part_loading_threads` did not have effect. See [#4699](https://github.com/ClickHouse/ClickHouse/issues/4699). [#34310](https://github.com/ClickHouse/ClickHouse/pull/34310) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `_file` and `_path` virtual columns (in file-like table engines) are made `LowCardinality` - it will make queries for multiple files faster. Closes [#34300](https://github.com/ClickHouse/ClickHouse/issues/34300). [#34317](https://github.com/ClickHouse/ClickHouse/pull/34317) ([flynn](https://github.com/ucasfl)). +* Improve performance of `mapPopulateSeries` function. Closes [#33944](https://github.com/ClickHouse/ClickHouse/issues/33944). [#34318](https://github.com/ClickHouse/ClickHouse/pull/34318) ([Maksim Kita](https://github.com/kitaisreal)). +* Use a vector to collect useless list iterators when doing a snapshot, and in latter clearOutdatedNodes, we can just traverse the vector, not the list, which is faster. [#34484](https://github.com/ClickHouse/ClickHouse/pull/34484) ([zhanglistar](https://github.com/zhanglistar)). +* Improve performance of insert into table functions URL, S3, File, HDFS. Closes [#34348](https://github.com/ClickHouse/ClickHouse/issues/34348). [#34510](https://github.com/ClickHouse/ClickHouse/pull/34510) ([Maksim Kita](https://github.com/kitaisreal)). +* According https://github.com/eBay/NuRaft/issues/209, there is no need to lock the mothod. [#34523](https://github.com/ClickHouse/ClickHouse/pull/34523) ([zhanglistar](https://github.com/zhanglistar)). + +#### Improvement +* Now ReplicatedMergeTree can recover data when some of its disks are broken. [#13544](https://github.com/ClickHouse/ClickHouse/pull/13544) ([Amos Bird](https://github.com/amosbird)). +* Merge [#15765](https://github.com/ClickHouse/ClickHouse/issues/15765) (Dynamic reload of server TLS certificates on config reload) cc @johnskopis. [#31257](https://github.com/ClickHouse/ClickHouse/pull/31257) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Added `UUID` data type support for functions `hex`, `bin`. [#32170](https://github.com/ClickHouse/ClickHouse/pull/32170) ([Frank Chen](https://github.com/FrankChen021)). +* Support `optimize_read_in_order` if prefix of sorting key is already sorted. E.g. if we have sorting key `ORDER BY (a, b)` in table and query with `WHERE a = const ORDER BY b` clauses, now it will be applied reading in order of sorting key instead of full sort. [#32748](https://github.com/ClickHouse/ClickHouse/pull/32748) ([Anton Popov](https://github.com/CurtizJ)). +* Add new keeper setting `min_session_timeout_ms`. Now keeper server will determine client session timeout according to `min_session_timeout_ms` and `session_timeout_ms` settings. [#33288](https://github.com/ClickHouse/ClickHouse/pull/33288) ([JackyWoo](https://github.com/JackyWoo)). +* Improve keeper performance and fix several memory leaks. [#33329](https://github.com/ClickHouse/ClickHouse/pull/33329) ([alesapin](https://github.com/alesapin)). +* Respect cgroup limits for CPU quota. [#33342](https://github.com/ClickHouse/ClickHouse/pull/33342) ([JaySon](https://github.com/JaySon-Huang)). +* Enable binary arithmetic(plus, minus, multiply, division, least, greates) between Decimal and Float. [#33355](https://github.com/ClickHouse/ClickHouse/pull/33355) ([flynn](https://github.com/ucasfl)). +* Replace `_shard_num` via constants (from [#7624](https://github.com/ClickHouse/ClickHouse/issues/7624)) with `shardNum()` function (from [#27020](https://github.com/ClickHouse/ClickHouse/issues/27020)), to avoid possible issues (like those that had been found in [#16947](https://github.com/ClickHouse/ClickHouse/issues/16947)). [#33392](https://github.com/ClickHouse/ClickHouse/pull/33392) ([Azat Khuzhin](https://github.com/azat)). +* Support `SET`, `YEAR`, `TIME` and `GEOMETRY` data types in `MaterializedMySQL`. Fixes [#18091](https://github.com/ClickHouse/ClickHouse/issues/18091), [#21536](https://github.com/ClickHouse/ClickHouse/issues/21536), [#26361](https://github.com/ClickHouse/ClickHouse/issues/26361). [#33429](https://github.com/ClickHouse/ClickHouse/pull/33429) ([zzsmdfj](https://github.com/zzsmdfj)). +* add function addressToLineWithInlines. Close [#26211](https://github.com/ClickHouse/ClickHouse/issues/26211). [#33467](https://github.com/ClickHouse/ClickHouse/pull/33467) ([SuperDJY](https://github.com/cmsxbc)). +* Improvement for `fromUnixTimestamp64` family functions.. They now accept any integer value that can be converted to `Int64`. This closes: [#14648](https://github.com/ClickHouse/ClickHouse/issues/14648). [#33505](https://github.com/ClickHouse/ClickHouse/pull/33505) ([Andrey Zvonov](https://github.com/zvonand)). +* Functions `dictGet`, `dictHas` implicitly cast key argument to dictionary key structure, if they are different. [#33672](https://github.com/ClickHouse/ClickHouse/pull/33672) ([Maksim Kita](https://github.com/kitaisreal)). +* - Parse and store OpenTelemetry trace-id in big-endian order. [#33723](https://github.com/ClickHouse/ClickHouse/pull/33723) ([Frank Chen](https://github.com/FrankChen021)). +* Enable stream to table join in WindowView. [#33729](https://github.com/ClickHouse/ClickHouse/pull/33729) ([vxider](https://github.com/Vxider)). +* Create parent directories in DiskS3::restoreFileOperations method. [#33730](https://github.com/ClickHouse/ClickHouse/pull/33730) ([ianton-ru](https://github.com/ianton-ru)). +* Add some improvements and fixes for Bool data type. Fixes [#33244](https://github.com/ClickHouse/ClickHouse/issues/33244). [#33737](https://github.com/ClickHouse/ClickHouse/pull/33737) ([Kruglov Pavel](https://github.com/Avogar)). +* Added support for cast from `Map(Key, Value)` to `Array(Tuple(Key, Value))`. [#33794](https://github.com/ClickHouse/ClickHouse/pull/33794) ([Maksim Kita](https://github.com/kitaisreal)). +* Support explain create function query ``` sql :) explain ast create function mycast AS (n) -> cast(n as String); EXPLAIN AST CREATE FUNCTION mycast AS n -> CAST(n, 'String'). [#33819](https://github.com/ClickHouse/ClickHouse/pull/33819) ([李扬](https://github.com/taiyang-li)). +* Try every resolved ip address while getting S3 proxy. [#33862](https://github.com/ClickHouse/ClickHouse/pull/33862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `FlatDictionary` improve performance of dictionary data load. [#33871](https://github.com/ClickHouse/ClickHouse/pull/33871) ([Maksim Kita](https://github.com/kitaisreal)). +* fix disk using the same path, close [#29072](https://github.com/ClickHouse/ClickHouse/issues/29072). [#33905](https://github.com/ClickHouse/ClickHouse/pull/33905) ([zhongyuankai](https://github.com/zhongyuankai)). +* Dictionaries added support for DateTime64. [#33914](https://github.com/ClickHouse/ClickHouse/pull/33914) ([Maksim Kita](https://github.com/kitaisreal)). +* `FlatDictionary`, `HashedDictionary`, `HashedArrayDictionary` added support for creating with empty attributes, with support of read all keys, and `dictHas`. Fixes [#33820](https://github.com/ClickHouse/ClickHouse/issues/33820). [#33918](https://github.com/ClickHouse/ClickHouse/pull/33918) ([Maksim Kita](https://github.com/kitaisreal)). +* `RangeHashedDictionary` improvements. Improve performance of load time if there are multiple attributes. Allow to create without attributes. Added option to specify strategy when intervals `start` and `end` have `Nullable` type `convert_null_range_bound_to_open` by default is `true`. Closes [#29791](https://github.com/ClickHouse/ClickHouse/issues/29791). Allow to specify `Float`, `Decimal`, `DateTime64`, `Int128`, `Int256`, `UInt128`, `UInt256` as range types. `RangeHashedDictionary` added support for range values that extend `Int64` type. Closes [#28322](https://github.com/ClickHouse/ClickHouse/issues/28322). Added option `range_lookup_strategy` to specify range lookup type `min`, `max` by default is `min` . Closes [#21647](https://github.com/ClickHouse/ClickHouse/issues/21647). Fixed allocated bytes calculations. Fixed type name in `system.dictionaries` in case of `ComplexKeyHashedDictionary`. [#33927](https://github.com/ClickHouse/ClickHouse/pull/33927) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix getauxval() in glibc-compatibility, this should fix vsyscalls after `setenv` (i.e. timezone is set in config), and LSan (and also fix some leaks that had been found by LSan). [#33957](https://github.com/ClickHouse/ClickHouse/pull/33957) ([Azat Khuzhin](https://github.com/azat)). +* Detect format and schema from stdin in clickhouse-local. [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed UTF-8 string case-insensitive search when lowercase and uppercase characters are represented by different number of bytes. Example is `ẞ` and `ß`. This closes [#7334](https://github.com/ClickHouse/ClickHouse/issues/7334). [#33992](https://github.com/ClickHouse/ClickHouse/pull/33992) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix memory accounting for queries that uses < max_untracker_memory. [#34001](https://github.com/ClickHouse/ClickHouse/pull/34001) ([Azat Khuzhin](https://github.com/azat)). +* Supports all types of SYSTEM query ON CLUSTER clause. [#34005](https://github.com/ClickHouse/ClickHouse/pull/34005) ([小路](https://github.com/nicelulu)). +* Add schema inference for values() table function. Closes [#33811](https://github.com/ClickHouse/ClickHouse/issues/33811). [#34017](https://github.com/ClickHouse/ClickHouse/pull/34017) ([Kruglov Pavel](https://github.com/Avogar)). +* Tracing context is now propagated from GRPC client metadata. [#34064](https://github.com/ClickHouse/ClickHouse/pull/34064) ([andremarianiello](https://github.com/andremarianiello)). +* Add UUID suport in MsgPack input/output format. [#34065](https://github.com/ClickHouse/ClickHouse/pull/34065) ([Kruglov Pavel](https://github.com/Avogar)). +* Improving the experience of multiple line editing for clickhouse-client. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/31123. [#34114](https://github.com/ClickHouse/ClickHouse/pull/34114) ([Amos Bird](https://github.com/amosbird)). +* Maxsplit argument for splitByChar. close [#34081](https://github.com/ClickHouse/ClickHouse/issues/34081). [#34140](https://github.com/ClickHouse/ClickHouse/pull/34140) ([李扬](https://github.com/taiyang-li)). +* Allow to parse dictionary `PRIMARY KEY` as `PRIMARY KEY (id, value)`, previously supported only `PRIMARY KEY id, value`. Closes [#34135](https://github.com/ClickHouse/ClickHouse/issues/34135). [#34141](https://github.com/ClickHouse/ClickHouse/pull/34141) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow carriage return in the middle of the line while parsing by `Regexp` format. This closes [#34200](https://github.com/ClickHouse/ClickHouse/issues/34200). [#34205](https://github.com/ClickHouse/ClickHouse/pull/34205) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Recognize `YYYYMMDD-hhmmss` format in `parseDateTimeBestEffort` function. This closes [#34206](https://github.com/ClickHouse/ClickHouse/issues/34206). [#34208](https://github.com/ClickHouse/ClickHouse/pull/34208) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to compose PostgreSQL-style cast operator `::` with `ArrayElement` and `TupleElement`. [#34229](https://github.com/ClickHouse/ClickHouse/pull/34229) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added `#!` and `# ` as a recognised start of a single line comment. Reference to task [#34138](https://github.com/ClickHouse/ClickHouse/issues/34138). [#34230](https://github.com/ClickHouse/ClickHouse/pull/34230) ([Aaron Katz](https://github.com/aaronstephenkatz)). +* Change severity of the "Cancelled merging parts" message in logs, because it's not an error. This closes [#34148](https://github.com/ClickHouse/ClickHouse/issues/34148). [#34232](https://github.com/ClickHouse/ClickHouse/pull/34232) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Apply data skipping indexes for queries with FINAL may produce incorrect result. Disable data skipping indexes by default for queries with FINAL (introduce new `use_skip_indexes_if_final` setting and disable it by default). [#34243](https://github.com/ClickHouse/ClickHouse/pull/34243) ([Azat Khuzhin](https://github.com/azat)). +* Support asynchronous inserts in `clickhouse-client` for queries with inlined data. [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) ([Anton Popov](https://github.com/CurtizJ)). +* Cancel merges before acquiring table lock for `TRUNCATE` query to avoid `DEADLOCK_AVOIDED` error in some cases. Fixes [#34302](https://github.com/ClickHouse/ClickHouse/issues/34302). [#34304](https://github.com/ClickHouse/ClickHouse/pull/34304) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Some servers expect a User-Agent header in their HTTP requests. A `User-Agent` header entry has been added to HTTP requests of the form: User-Agent: ClickHouse/VERSION_STRING. [#34330](https://github.com/ClickHouse/ClickHouse/pull/34330) ([Saad Ur Rahman](https://github.com/surahman)). +* REGEXP_MATCHES and REGEXP_REPLACE function aliases for compatibility with PostgreSQL. Close [#30885](https://github.com/ClickHouse/ClickHouse/issues/30885). [#34334](https://github.com/ClickHouse/ClickHouse/pull/34334) ([李扬](https://github.com/taiyang-li)). +* Better handle pre-inputs before client start. This is for [#34308](https://github.com/ClickHouse/ClickHouse/issues/34308) . [#34336](https://github.com/ClickHouse/ClickHouse/pull/34336) ([Amos Bird](https://github.com/amosbird)). +* Add options for clickhouse-format. Which close [#30528](https://github.com/ClickHouse/ClickHouse/issues/30528) - max_query_size - max_parser_depth. [#34349](https://github.com/ClickHouse/ClickHouse/pull/34349) ([李扬](https://github.com/taiyang-li)). +* Default input and output formats that can be overriden by --input-format and --output-format. Close [#30631](https://github.com/ClickHouse/ClickHouse/issues/30631). [#34352](https://github.com/ClickHouse/ClickHouse/pull/34352) ([李扬](https://github.com/taiyang-li)). +* Allow to skip not found urls for globs when using URL storage / table function. Also closes [#34359](https://github.com/ClickHouse/ClickHouse/issues/34359). [#34392](https://github.com/ClickHouse/ClickHouse/pull/34392) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add two new settings: `s3_upload_part_size_multiply_factor` and `s3_upload_part_size_multiply_parts_count_threshold`. Now each time `s3_upload_part_size_multiply_parts_count_threshold` uploaded to S3 from a single query `s3_min_upload_part_size` multiplied by `s3_upload_part_size_multiply_factor`. Fixes [#34244](https://github.com/ClickHouse/ClickHouse/issues/34244). [#34422](https://github.com/ClickHouse/ClickHouse/pull/34422) ([alesapin](https://github.com/alesapin)). +* Allow `allow_experimental_projection_optimization` by default. [#34456](https://github.com/ClickHouse/ClickHouse/pull/34456) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Privileges CREATE/ALTER/DROP ROW POLICY now can be granted on a table or on `database.*` as well as globally `*.*`. [#34489](https://github.com/ClickHouse/ClickHouse/pull/34489) ([Vitaly Baranov](https://github.com/vitlibar)). +* Refactor client fault tolerant connection (https://github.com/ClickHouse/ClickHouse/pull/33824#issuecomment-1033690860). The new way to use it: ```bash clickhouse-client ... --host host1 --host host2 --port port2 --host host3 --port port --host host4 ```. [#34490](https://github.com/ClickHouse/ClickHouse/pull/34490) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve schema inference in clickhouse-local. Allow to write just `clickhouse-local -q "select * from table" < data.format`. [#34495](https://github.com/ClickHouse/ClickHouse/pull/34495) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `.jsonl` extension for JSONEachRow format. [#34496](https://github.com/ClickHouse/ClickHouse/pull/34496) ([Kruglov Pavel](https://github.com/Avogar)). +* Send ProfileEvents statistics in case of INSERT SELECT query. [#34498](https://github.com/ClickHouse/ClickHouse/pull/34498) ([Dmitry Novik](https://github.com/novikd)). +* Added sending of the output format back to client like it's done in HTTP protocol as suggested in [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). Closes [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). [#34499](https://github.com/ClickHouse/ClickHouse/pull/34499) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow to write `s3(url, access_key_id, secret_access_key)`. [#34503](https://github.com/ClickHouse/ClickHouse/pull/34503) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `IF EXISTS` clause for `TTL expr TO [DISK|VOLUME] [IF EXISTS] 'xxx'` feature. Parts will be moved to disk or volume only if it exists on replica, so `MOVE TTL` rules will be able to behave differently on replicas according to the existing storage policies. Resolves [#34455](https://github.com/ClickHouse/ClickHouse/issues/34455). [#34504](https://github.com/ClickHouse/ClickHouse/pull/34504) ([Anton Popov](https://github.com/CurtizJ)). +* Little improvement no need to clone log entry. [#34587](https://github.com/ClickHouse/ClickHouse/pull/34587) ([zhanglistar](https://github.com/zhanglistar)). +* Slightly improve performance in case of filtering by sparse columns (which can be enabled by setting `ratio_of_defaults_for_sparse_serialization` in `MergeTree` tables). [#34601](https://github.com/ClickHouse/ClickHouse/pull/34601) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)). +* This PR fixes [#18206](https://github.com/ClickHouse/ClickHouse/issues/18206). [#33977](https://github.com/ClickHouse/ClickHouse/pull/33977) ([Vitaly Baranov](https://github.com/vitlibar)). +* This PR fixes [#19429](https://github.com/ClickHouse/ClickHouse/issues/19429). [#34225](https://github.com/ClickHouse/ClickHouse/pull/34225) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix inserting to temporary tables via gRPC. This PR fixes [#34347](https://github.com/ClickHouse/ClickHouse/issues/34347), issue `#2`. [#34364](https://github.com/ClickHouse/ClickHouse/pull/34364) ([Vitaly Baranov](https://github.com/vitlibar)). +* add HashMethodSingleLowCardinalityColumn::findKey, avoid crash. [#34506](https://github.com/ClickHouse/ClickHouse/pull/34506) ([DR](https://github.com/freedomDR)). + +#### Build/Testing/Packaging Improvement +* Add action for published releases. [#32218](https://github.com/ClickHouse/ClickHouse/pull/32218) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove unbundled support. [#33690](https://github.com/ClickHouse/ClickHouse/pull/33690) ([Azat Khuzhin](https://github.com/azat)). +* Some improvement over current build system. [#33695](https://github.com/ClickHouse/ClickHouse/pull/33695) ([Amos Bird](https://github.com/amosbird)). +* Removed "Yandex ClickHouse" terms from descriptions. Change to default mirrors for packages. [#33745](https://github.com/ClickHouse/ClickHouse/pull/33745) ([Ilya Yatsishin](https://github.com/qoega)). +* - Fix unconditional `--build-arg FROM_TAG=` docker argument - Add some tests for docker tests/ci. [#33751](https://github.com/ClickHouse/ClickHouse/pull/33751) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable dh_update_autotools_config that updates config.guess/config.sub in sources directory on build. It will be more deterministic in terms of different build hosts used as config.guess is provided with sources. [#33752](https://github.com/ClickHouse/ClickHouse/pull/33752) ([Ilya Yatsishin](https://github.com/qoega)). +* clickhouse-test.deb is not used in stateless CI checks. [#33948](https://github.com/ClickHouse/ClickHouse/pull/33948) ([Ilya Yatsishin](https://github.com/qoega)). +* - Add on-demand style-checker-aarch64 hosts - Run dockerpush CI jobs there. [#33954](https://github.com/ClickHouse/ClickHouse/pull/33954) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add back the missing `-Werror` flag globally. This fixes https://github.com/ClickHouse/ClickHouse/pull/33940#issuecomment-1020466537. [#33970](https://github.com/ClickHouse/ClickHouse/pull/33970) ([Amos Bird](https://github.com/amosbird)). +* Separate base parts out of SystemLog. Common code can use system log without fully linking to Interpreters. This helps with errors like https://s3.amazonaws.com/clickhouse-builds/33970/49b229f9c781854861254350d3407f209fb99dfd/binary_splitted/build_log.log. [#33978](https://github.com/ClickHouse/ClickHouse/pull/33978) ([Amos Bird](https://github.com/amosbird)). +* Fix broken dependencies tree building, improve tests. [#33983](https://github.com/ClickHouse/ClickHouse/pull/33983) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Add get_with_retries to a download helper - Use it un PRInfo class - Replace `labels_from_api` by `pr_event_from_api` - Use it in description check to have always actual body and labels. [#34012](https://github.com/ClickHouse/ClickHouse/pull/34012) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Fix style check for tags_stable.yml - Run StyleCheck always in PR - Mark `Run Check` as failed only for some cases. [#34283](https://github.com/ClickHouse/ClickHouse/pull/34283) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add verbosity to a style check. [#34289](https://github.com/ClickHouse/ClickHouse/pull/34289) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix previously wrong OK_SKIP_LABELS in run_check.py. [#34340](https://github.com/ClickHouse/ClickHouse/pull/34340) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Migrate docker images from Moscow timezone to UTC - Update ubuntu version for performance comparison from 18.04 to 20.04. [#34373](https://github.com/ClickHouse/ClickHouse/pull/34373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Ensure that tests don't depend on the result of non-stable sorting of equal elements. Added equal items ranges randomization in debug after sort to prevent issues when we rely on equal items sort order. [#34393](https://github.com/ClickHouse/ClickHouse/pull/34393) ([Maksim Kita](https://github.com/kitaisreal)). +* - Rebuild docker images on a daily base - Add `--all` flag to rebuild all images - Add `only_amd64` parameter for some images - Revert all workarounds for `only_amd64` images - Fix all broken images. [#34492](https://github.com/ClickHouse/ClickHouse/pull/34492) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix broken Hadoop tests after updated paths in an image. [#34556](https://github.com/ClickHouse/ClickHouse/pull/34556) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* I test compile ClickHouse without HDFS but failed, because the code in DiskHDFS doesn`t judge whether use HDFS. [#34573](https://github.com/ClickHouse/ClickHouse/pull/34573) ([zxealous](https://github.com/zxealous)). +* Set timeout 40 minutes for fast tests. [#34614](https://github.com/ClickHouse/ClickHouse/pull/34614) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reverts ClickHouse/ClickHouse[#34614](https://github.com/ClickHouse/ClickHouse/issues/34614). [#34622](https://github.com/ClickHouse/ClickHouse/pull/34622) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Rework version_helper, make it executable - Reimplement StorageSystemContributors.sh in version_helper - Create a release script. [#34641](https://github.com/ClickHouse/ClickHouse/pull/34641) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Fix path in workflows/release.yml - To be backported to branch 22.1. [#34646](https://github.com/ClickHouse/ClickHouse/pull/34646) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix lz4 compression for output. Closes [#31421](https://github.com/ClickHouse/ClickHouse/issues/31421). [#31862](https://github.com/ClickHouse/ClickHouse/pull/31862) ([Kruglov Pavel](https://github.com/Avogar)). +* Create a function escapeForLDAPFilter and use it to escape characters '(' and ')' in a final_user_dn variable. [#33401](https://github.com/ClickHouse/ClickHouse/pull/33401) ([IlyaTsoi](https://github.com/IlyaTsoi)). +* TODO. [#33492](https://github.com/ClickHouse/ClickHouse/pull/33492) ([huzhichengdd](https://github.com/huzhichengdd)). +* Fix error `Bad cast from type ... to DB::DataTypeArray` which may happen when table has `Nested` column with dots in name, and default value is generated for it (e.g. during insert, when column is not listed). Continuation of [#28762](https://github.com/ClickHouse/ClickHouse/issues/28762). [#33588](https://github.com/ClickHouse/ClickHouse/pull/33588) ([Alexey Pavlenko](https://github.com/alexeypavlenko)). +* Fix `Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform` (in case of `optimize_aggregation_in_order=1`). [#33637](https://github.com/ClickHouse/ClickHouse/pull/33637) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug in zero copy replication which lead to data duplication in case of TTL move. Fixes [#33643](https://github.com/ClickHouse/ClickHouse/issues/33643). [#33642](https://github.com/ClickHouse/ClickHouse/pull/33642) ([alesapin](https://github.com/alesapin)). +* Allow some queries with sorting, LIMIT BY, ARRAY JOIN and lambda functions. This closes [#7462](https://github.com/ClickHouse/ClickHouse/issues/7462). [#33675](https://github.com/ClickHouse/ClickHouse/pull/33675) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correctly determine current database if `CREATE TEMPORARY TABLE AS SELECT` is queried inside a named HTTP session. This is a very rare use case. This closes [#8340](https://github.com/ClickHouse/ClickHouse/issues/8340). [#33676](https://github.com/ClickHouse/ClickHouse/pull/33676) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix mutation when table contains projections. This fixes [#33010](https://github.com/ClickHouse/ClickHouse/issues/33010) . This fixes [#33275](https://github.com/ClickHouse/ClickHouse/issues/33275) . [#33679](https://github.com/ClickHouse/ClickHouse/pull/33679) ([Amos Bird](https://github.com/amosbird)). +* Throw exception when storage hdfs list directory failed. [#33724](https://github.com/ClickHouse/ClickHouse/pull/33724) ([LiuNeng](https://github.com/liuneng1994)). +* Fix tiny race between count() and INSERT/merges/... in MergeTree (it is possible to return incorrect number of rows for SELECT with optimize_trivial_count_query). [#33753](https://github.com/ClickHouse/ClickHouse/pull/33753) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug of check table when creating data part with wide format and projection. [#33774](https://github.com/ClickHouse/ClickHouse/pull/33774) ([李扬](https://github.com/taiyang-li)). +* Fix parsing query INSERT INTO ... VALUES SETTINGS ... (...), ... [#33776](https://github.com/ClickHouse/ClickHouse/pull/33776) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug in client that led to 'Connection reset by peer' in server. Closes [#33309](https://github.com/ClickHouse/ClickHouse/issues/33309). [#33790](https://github.com/ClickHouse/ClickHouse/pull/33790) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix usage of external dictionaries with `Redis` source and large number of keys. [#33804](https://github.com/ClickHouse/ClickHouse/pull/33804) ([Anton Popov](https://github.com/CurtizJ)). +* Fix schema inference for JSONEachRow and JSONCompactEachRow. [#33830](https://github.com/ClickHouse/ClickHouse/pull/33830) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix KeyCondition with no common types available. [#33833](https://github.com/ClickHouse/ClickHouse/pull/33833) ([Amos Bird](https://github.com/amosbird)). +* Fix memory leak in `clickhouse-keeper` in case of compression is used (default). [#33840](https://github.com/ClickHouse/ClickHouse/pull/33840) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `replica is not readonly` logical error on `SYSTEM RESTORE REPLICA` query when replica is actually readonly. Fixes [#33806](https://github.com/ClickHouse/ClickHouse/issues/33806). [#33847](https://github.com/ClickHouse/ClickHouse/pull/33847) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix usage of sparse columns (which can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization`). [#33849](https://github.com/ClickHouse/ClickHouse/pull/33849) ([Anton Popov](https://github.com/CurtizJ)). +* Fix crash if sql user defined function is created with lambda with non identifier arguments. Closes [#33866](https://github.com/ClickHouse/ClickHouse/issues/33866). [#33868](https://github.com/ClickHouse/ClickHouse/pull/33868) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix potential race condition when doing remote disk read. cc @Jokser. [#33912](https://github.com/ClickHouse/ClickHouse/pull/33912) ([Amos Bird](https://github.com/amosbird)). +* Aggregate function combinator `-If` did not correctly process `Nullable` filter argument. This closes [#27073](https://github.com/ClickHouse/ClickHouse/issues/27073). [#33920](https://github.com/ClickHouse/ClickHouse/pull/33920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix parsing ZK metadata: now metadata from zookeeper compared with local metadata in canonical form. [#33933](https://github.com/ClickHouse/ClickHouse/pull/33933) ([sunny](https://github.com/sunny19930321)). +* Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). +* Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix parsing IPv6 from query parameter and fix IPv6 to string conversion. Closes [#33928](https://github.com/ClickHouse/ClickHouse/issues/33928). [#33971](https://github.com/ClickHouse/ClickHouse/pull/33971) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug which lead to inability for server to start when both replicated access storage and keeper are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)). +* - Fixes `parallel_view_processing=0` not working when inserting into a table using `VALUES`. - Fixes `view_duration_ms` in the `query_views_log` not being set correctly for materialized views. [#34067](https://github.com/ClickHouse/ClickHouse/pull/34067) ([Raúl Marín](https://github.com/Algunenano)). +* Fix asynchronous inserts with `Native` format. [#34068](https://github.com/ClickHouse/ClickHouse/pull/34068) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible data race in StorageFile that was introduced in https://github.com/ClickHouse/ClickHouse/pull/33960. Closes [#34111](https://github.com/ClickHouse/ClickHouse/issues/34111). [#34113](https://github.com/ClickHouse/ClickHouse/pull/34113) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inserts to distributed tables in case of change of native protocol. The last change was in the version version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3. [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)). +* Fix rare and benign race condition in `HDFS`, `S3` and `URL` storage engines which can lead to additional connections. [#34172](https://github.com/ClickHouse/ClickHouse/pull/34172) ([alesapin](https://github.com/alesapin)). +* Fix schema inference for table runction s3. [#34186](https://github.com/ClickHouse/ClickHouse/pull/34186) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix metric `Query`, which shows number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)). +* Fix reading of subcolumns with dots in their names. In particular fixed reading of `Nested` columns, if their element names contain dots (e.g ```Nested(`keys.name` String, `keys.id` UInt64, values UInt64)```). [#34228](https://github.com/ClickHouse/ClickHouse/pull/34228) ([Anton Popov](https://github.com/CurtizJ)). +* Fix memory leak in case of some Exception during query processing with `optimize_aggregation_in_order=1`. [#34234](https://github.com/ClickHouse/ClickHouse/pull/34234) ([Azat Khuzhin](https://github.com/azat)). +* Fix current_user/current_address for interserver mode (Before this patch current_user/current_address will be preserved from the previous query). [#34263](https://github.com/ClickHouse/ClickHouse/pull/34263) ([Azat Khuzhin](https://github.com/azat)). +* Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed a couple of extremely rare race conditions that might lead to broken state of replication queue and "intersecting parts" error. [#34297](https://github.com/ClickHouse/ClickHouse/pull/34297) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix various issues when projection is enabled by default. Each issue is described in separate commit. This is for [#33678](https://github.com/ClickHouse/ClickHouse/issues/33678) . This fixes [#34273](https://github.com/ClickHouse/ClickHouse/issues/34273). [#34305](https://github.com/ClickHouse/ClickHouse/pull/34305) ([Amos Bird](https://github.com/amosbird)). +* Try to fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)). +* Fix wrong engine syntax in result of `SHOW CREATE DATABASE` query for databases with engine `Memory`. This closes [#34335](https://github.com/ClickHouse/ClickHouse/issues/34335). [#34345](https://github.com/ClickHouse/ClickHouse/pull/34345) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* For SQLUserDefinedFunctions change privilege level from DATABASE to GLOBAL. Closes [#34281](https://github.com/ClickHouse/ClickHouse/issues/34281). [#34404](https://github.com/ClickHouse/ClickHouse/pull/34404) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix segfault in schema inference from url. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible error 'Cannot convert column Function to mask' in short circuit function evaluation. Closes [#34171](https://github.com/ClickHouse/ClickHouse/issues/34171). [#34415](https://github.com/ClickHouse/ClickHouse/pull/34415) ([Kruglov Pavel](https://github.com/Avogar)). +* Add missing lock for storage. Fixes possible race with table deletion. [#34416](https://github.com/ClickHouse/ClickHouse/pull/34416) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible error 'file_size: Operation not supported'. [#34479](https://github.com/ClickHouse/ClickHouse/pull/34479) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix compression support in URL engine. [#34524](https://github.com/ClickHouse/ClickHouse/pull/34524) ([Frank Chen](https://github.com/FrankChen021)). +* Fix comparison between integers and floats in index analysis. Previously it could lead to skipping some granules for reading by mistake. Fixes [#34493](https://github.com/ClickHouse/ClickHouse/issues/34493). [#34528](https://github.com/ClickHouse/ClickHouse/pull/34528) ([Anton Popov](https://github.com/CurtizJ)). +* Fix exception `Chunk should have AggregatedChunkInfo in MergingAggregatedTransform` (in case of `optimize_aggregation_in_order=1` and `distributed_aggregation_memory_efficient=0`). Fixes [#34526](https://github.com/ClickHouse/ClickHouse/issues/34526). [#34532](https://github.com/ClickHouse/ClickHouse/pull/34532) ([Anton Popov](https://github.com/CurtizJ)). +* In case of cancelation S3 and HDFS canceled only current reader, but continued to execute the initial query. Fixes [#34301](https://github.com/ClickHouse/ClickHouse/issues/34301) Relates to [#34397](https://github.com/ClickHouse/ClickHouse/issues/34397). [#34539](https://github.com/ClickHouse/ClickHouse/pull/34539) ([Dmitry Novik](https://github.com/novikd)). +* Fix bug of round/roundBankers, close [#33267](https://github.com/ClickHouse/ClickHouse/issues/33267). [#34562](https://github.com/ClickHouse/ClickHouse/pull/34562) ([李扬](https://github.com/taiyang-li)). +* Fixed the assertion in case of using `allow_experimental_parallel_reading_from_replicas` with `max_parallel_replicas` equals to 1. This fixes [#34525](https://github.com/ClickHouse/ClickHouse/issues/34525). [#34613](https://github.com/ClickHouse/ClickHouse/pull/34613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* - Add Debug workflow to get variables for all actions on demand - Fix lack of pr_info.number for some edge case. [#34644](https://github.com/ClickHouse/ClickHouse/pull/34644) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Additionally check remote_fs_execute_merges_on_single_replica_time_threshold inside ReplicatedMergeTreeQueue"'. [#34201](https://github.com/ClickHouse/ClickHouse/pull/34201) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add func tests run with s3"'. [#34211](https://github.com/ClickHouse/ClickHouse/pull/34211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add pool to WriteBufferFromS3"'. [#34212](https://github.com/ClickHouse/ClickHouse/pull/34212) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Add support agreement page and snippets.'. [#34512](https://github.com/ClickHouse/ClickHouse/pull/34512) ([Tom Risse](https://github.com/flickerbox-tom)). +* NO CL ENTRY: 'Add Gigasheet to adopters'. [#34589](https://github.com/ClickHouse/ClickHouse/pull/34589) ([Brian Hunter](https://github.com/bjhunter)). + +#### NO CL CATEGORY + +* Reverting to previous docker images, will take a closer look at failing tests from [#34373](https://github.com/ClickHouse/ClickHouse/issues/34373). [#34413](https://github.com/ClickHouse/ClickHouse/pull/34413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.2.2.1-stable.md b/docs/changelogs/v22.2.2.1-stable.md new file mode 100644 index 00000000000..c9158290753 --- /dev/null +++ b/docs/changelogs/v22.2.2.1-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v22.2.2.1-stable FIXME as compared to v22.2.1.2139-prestable + diff --git a/docs/changelogs/v22.2.3.5-stable.md b/docs/changelogs/v22.2.3.5-stable.md new file mode 100644 index 00000000000..90c0d22d570 --- /dev/null +++ b/docs/changelogs/v22.2.3.5-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v22.2.3.5-stable FIXME as compared to v22.2.2.1-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#34848](https://github.com/ClickHouse/ClickHouse/issues/34848): Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). + diff --git a/docs/changelogs/v22.3.1.1262-prestable.md b/docs/changelogs/v22.3.1.1262-prestable.md new file mode 100644 index 00000000000..f47afd67021 --- /dev/null +++ b/docs/changelogs/v22.3.1.1262-prestable.md @@ -0,0 +1,146 @@ +### ClickHouse release v22.3.1.1262-prestable FIXME as compared to v22.2.1.2139-prestable + +#### Backward Incompatible Change +* Improvement the toDatetime function overflows. When the date string is very large, it will be converted to 1970. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). +* Make arrayCompact behave as other higher-order functions: perform compaction not of lambda function results but on original array. If you using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping arrayCompact arguments into arrayMap. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). + +#### New Feature +* New data type `Object()`, which supports storing of semi-structured data (for now JSON only). Data is written to such types as string. Then all paths are extracted according to format of semi-structured data and written as separate columns in most optimal types, that can store all their values. Those columns can be queried by names that match paths in source data. E.g `data.key1.key2` or with cast operator `data.key1.key2::Int64`. [#23932](https://github.com/ClickHouse/ClickHouse/pull/23932) ([Anton Popov](https://github.com/CurtizJ)). +* Support authentication of users connected via SSL by their X.509 certificate. [#31484](https://github.com/ClickHouse/ClickHouse/pull/31484) ([eungenue](https://github.com/eungenue)). +* related to issue: [#30715](https://github.com/ClickHouse/ClickHouse/issues/30715). Add three functions for map data type: 1. mapReplace(map1, map2) - replaces values for keys in map1 with the values of the corresponding keys in map2; adds keys from map2 that don't exist in map1. 2. mapFilter 3. mapMap mapFilter and mapMap are higher order functions , accept two arguments, first argument is a lambda function with k, v pair , the second argument is a map type column. [#33698](https://github.com/ClickHouse/ClickHouse/pull/33698) ([hexiaoting](https://github.com/hexiaoting)). +* Add local cache for disk s3. Closes [#28961](https://github.com/ClickHouse/ClickHouse/issues/28961). [#33717](https://github.com/ClickHouse/ClickHouse/pull/33717) ([Kseniia Sumarokova](https://github.com/kssenii)). +* - Add startsWith & endsWith function for arrays, closes [#33982](https://github.com/ClickHouse/ClickHouse/issues/33982). [#34368](https://github.com/ClickHouse/ClickHouse/pull/34368) ([usurai](https://github.com/usurai)). +* Implement DateTime64 transform from and to arrow column, which closes [#8280](https://github.com/ClickHouse/ClickHouse/issues/8280) and closes [#28574](https://github.com/ClickHouse/ClickHouse/issues/28574). [#34561](https://github.com/ClickHouse/ClickHouse/pull/34561) ([李扬](https://github.com/taiyang-li)). +* Add cpu/mem metric for clickhouse-local. Close [#34545](https://github.com/ClickHouse/ClickHouse/issues/34545). [#34605](https://github.com/ClickHouse/ClickHouse/pull/34605) ([李扬](https://github.com/taiyang-li)). +* Support schema inference for inserting into table functions file/hdfs/s3/url. [#34732](https://github.com/ClickHouse/ClickHouse/pull/34732) ([Kruglov Pavel](https://github.com/Avogar)). +* A new settings called is added in server configuration which on/off insecure AUTH_TYPE plaintext-password and no_password. By default the property is set to true which means authType Plaintext_password & NO_password is allowed. [#34738](https://github.com/ClickHouse/ClickHouse/pull/34738) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Add new table function `hive`, usage as follow ``` hive('', '', '', '', '') ``` for example ``` SELECT * FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') ```. [#34946](https://github.com/ClickHouse/ClickHouse/pull/34946) ([lgbo](https://github.com/lgbo-ustc)). +* - When use clickhouse-client logining, If user and password is not specified in command line or configuration file, get them from `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables. Close [#34538](https://github.com/ClickHouse/ClickHouse/issues/34538). [#34947](https://github.com/ClickHouse/ClickHouse/pull/34947) ([DR](https://github.com/freedomDR)). +* Added date_time_input_format = 'best_effort_us'. Closes [#34799](https://github.com/ClickHouse/ClickHouse/issues/34799). [#34982](https://github.com/ClickHouse/ClickHouse/pull/34982) ([WenYao](https://github.com/Cai-Yao)). +* Changed the Play UI to select a theme by the following priority: * 'theme' GET parameter * 'theme' in localStorage * According to OS preference (didn't work before). [#35068](https://github.com/ClickHouse/ClickHouse/pull/35068) ([peledni](https://github.com/peledni)). +* ``` sql ) explain ast graph = 1 select * from system.parts;. [#35173](https://github.com/ClickHouse/ClickHouse/pull/35173) ([李扬](https://github.com/taiyang-li)). +* Add `database_replicated_allow_only_replicated_engine` setting. When enabled, it only allowed to create `Replicated` tables in `Replicated` database. [#35214](https://github.com/ClickHouse/ClickHouse/pull/35214) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Performance Improvement +* Calling std::distance on large list will decrease performance, use a version in Node to substitute with more memory 80MB/1000w Nodes. The logic is: Every node is list has a version with type size_t, setting in insert or insertOrReplace method using the class member variable current_version. The version is only increase at enableSnapshot method. When doing a snapshot, call snapshotSizeWithVersion to get snapshot size and version(snapshot_up_to_version). When traversing the list, if node version is less then or equal to snapshot_up_to_version, then protects it from deleting if node version is bigger than snapshot_up_to_version, we can do anything to it. [#34486](https://github.com/ClickHouse/ClickHouse/pull/34486) ([zhanglistar](https://github.com/zhanglistar)). +* Compaction of log store in Nuraft need acquire an inner lock which also used in normal commit process, so we delete useless logs in `compact` method of Changelog class in a background thread. See details on: https://github.com/ClickHouse-Extras/NuRaft/blob/1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d/src/handle_commit.cxx#L560. [#34534](https://github.com/ClickHouse/ClickHouse/pull/34534) ([zhanglistar](https://github.com/zhanglistar)). +* Don't hold the latest snapshot in memory, instead, reading the snapshot if needed, sequence reading is fast to 200+MBps even on HDD using mmap system call. Writing snapshot data directly to disk using compression method without holding original data and compressed data in memory. [#34584](https://github.com/ClickHouse/ClickHouse/pull/34584) ([zhanglistar](https://github.com/zhanglistar)). +* MergeTree improve insert performance replacing std::stable_sort with pdqsort. [#34750](https://github.com/ClickHouse/ClickHouse/pull/34750) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve the performance of the `ANY` aggregation function by acting over batches. [#34760](https://github.com/ClickHouse/ClickHouse/pull/34760) ([Raúl Marín](https://github.com/Algunenano)). +* Improve performance of `detectCharset `, `detectLanguageUnknown ` functions. Improve performance of `DirectDictionary` if dictionary source is `ClickHouse`. Improve performance of processing queries with large `IN` section. [#34888](https://github.com/ClickHouse/ClickHouse/pull/34888) ([Maksim Kita](https://github.com/kitaisreal)). +* Less lock on connection using atomic stat. Notice that it is an approximate stat. [#35010](https://github.com/ClickHouse/ClickHouse/pull/35010) ([zhanglistar](https://github.com/zhanglistar)). + +#### Improvement +* Make the znode ctime and mtime consistent between servers. [#33441](https://github.com/ClickHouse/ClickHouse/pull/33441) ([小路](https://github.com/nicelulu)). +* Hold time lock while assigning tasks to clear old temporary directories in StorageMergeTree. [#34025](https://github.com/ClickHouse/ClickHouse/pull/34025) ([Amos Bird](https://github.com/amosbird)). +* When large files were written with `s3` table function or table engine, the content type on the files was mistakenly set to `application/xml` due to a bug in the AWS SDK. This closes [#33964](https://github.com/ClickHouse/ClickHouse/issues/33964). [#34433](https://github.com/ClickHouse/ClickHouse/pull/34433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve schema inference with globs in FIle/S3/HDFS/URL engines. Try to use the next path for schema inference in case of error. [#34465](https://github.com/ClickHouse/ClickHouse/pull/34465) ([Kruglov Pavel](https://github.com/Avogar)). +* - Improve the opentelemetry span logs for INSERT operation on distributed table. [#34480](https://github.com/ClickHouse/ClickHouse/pull/34480) ([Frank Chen](https://github.com/FrankChen021)). +* MaterializedMySQL support materialized_mysql_tables_list(a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated) settings, mentioned at [#32977](https://github.com/ClickHouse/ClickHouse/issues/32977). [#34487](https://github.com/ClickHouse/ClickHouse/pull/34487) ([zzsmdfj](https://github.com/zzsmdfj)). +* This PR changes restrictive row policies a bit to make them an easier alternative to permissive policies in easy cases. If for a particular table only restrictive policies exist (without permissive policies) users will be able to see some rows. Also `SHOW CREATE ROW POLICY` will always show `AS permissive` or `AS restrictive` in row policy's definition. [#34596](https://github.com/ClickHouse/ClickHouse/pull/34596) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `encodeURLComponent`, 'encodeURLFormComponent' function. Closes [#31092](https://github.com/ClickHouse/ClickHouse/issues/31092). [#34607](https://github.com/ClickHouse/ClickHouse/pull/34607) ([zzsmdfj](https://github.com/zzsmdfj)). +* Now you can read `system.zookeeper` table without restrictions on path or using `like` expression. This reads can generate quite heavy load for zookeeper so to enable this ability you have to enable setting `allow_unrestricted_reads_from_keeper`. [#34609](https://github.com/ClickHouse/ClickHouse/pull/34609) ([Sergei Trifonov](https://github.com/serxa)). +* Some refactoring and improvement over async and remote buffer related stuff. Separated in each commit. [#34629](https://github.com/ClickHouse/ClickHouse/pull/34629) ([Amos Bird](https://github.com/amosbird)). +* ExecutableUserDefinedFunctions allow to specify argument names. This is necessary for formats where argument name is part of serialization, like `Native`, `JSONEachRow`. Closes [#34604](https://github.com/ClickHouse/ClickHouse/issues/34604). [#34653](https://github.com/ClickHouse/ClickHouse/pull/34653) ([Maksim Kita](https://github.com/kitaisreal)). +* Extract schema only once on table creation and prevent reading from local files/external sources to extract schema on each server startup. [#34684](https://github.com/ClickHouse/ClickHouse/pull/34684) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not reset logging that configured via --log-file/--errorlog-file in case of empty logger.log/logger.errorlog. [#34718](https://github.com/ClickHouse/ClickHouse/pull/34718) ([Amos Bird](https://github.com/amosbird)). +* Support `remote()`/`cluster()` for `parallel_distributed_insert_select=2`. [#34728](https://github.com/ClickHouse/ClickHouse/pull/34728) ([Azat Khuzhin](https://github.com/azat)). +* Add name hints for data skipping indices. Closes [#29698](https://github.com/ClickHouse/ClickHouse/issues/29698). [#34764](https://github.com/ClickHouse/ClickHouse/pull/34764) ([flynn](https://github.com/ucasfl)). +* Now `ALTER TABLE DROP COLUMN columnX` queries for `MergeTree` table engines will work instantly when `columnX` is `ALIAS` column. Fixes [#34660](https://github.com/ClickHouse/ClickHouse/issues/34660). [#34786](https://github.com/ClickHouse/ClickHouse/pull/34786) ([alesapin](https://github.com/alesapin)). +* In previous versions the progress bar in clickhouse-client can jump forward near 50% for no reason. This closes [#34324](https://github.com/ClickHouse/ClickHouse/issues/34324). [#34801](https://github.com/ClickHouse/ClickHouse/pull/34801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix reading only columns which user asked for. Closes [#34163](https://github.com/ClickHouse/ClickHouse/issues/34163). [#34849](https://github.com/ClickHouse/ClickHouse/pull/34849) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Implement MemoryStatisticsOS for FreeBSD. [#34902](https://github.com/ClickHouse/ClickHouse/pull/34902) ([Alexandre Snarskii](https://github.com/snar)). +* Allow to open empty sqlite db file if it does not exist. Closes [#33367](https://github.com/ClickHouse/ClickHouse/issues/33367). [#34907](https://github.com/ClickHouse/ClickHouse/pull/34907) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow LowCardinality strings for ngrambf_v1/tokenbf_v1 indexes. Closes [#21865](https://github.com/ClickHouse/ClickHouse/issues/21865). [#34911](https://github.com/ClickHouse/ClickHouse/pull/34911) ([Lars Hiller Eidnes](https://github.com/larspars)). +* Ignore per-column `TTL` in `CREATE TABLE AS` if new table engine does not support it (i.e. if the engine is not of `MergeTree` family). [#34938](https://github.com/ClickHouse/ClickHouse/pull/34938) ([Azat Khuzhin](https://github.com/azat)). +* Use connection pool for hive metastore client. [#34940](https://github.com/ClickHouse/ClickHouse/pull/34940) ([lgbo](https://github.com/lgbo-ustc)). +* Currently, if the user changes the settings of the system tables there will be tons of logs and ClickHouse will rename the tables every minute. This fixes [#34929](https://github.com/ClickHouse/ClickHouse/issues/34929). [#34949](https://github.com/ClickHouse/ClickHouse/pull/34949) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* remove unnecessary columns for reading parquet/orc files. [#34954](https://github.com/ClickHouse/ClickHouse/pull/34954) ([lgbo](https://github.com/lgbo-ustc)). +* For random access readbuffer in hive, the first time to read the readbuffer would use the original readbuffer instead of local file. When we read a parquet/orc format file, the readbuffer seeks to the end of the file, which will be blocked until the local file finishes download, and make the whold process slow. [#34957](https://github.com/ClickHouse/ClickHouse/pull/34957) ([lgbo](https://github.com/lgbo-ustc)). +* Add more sanity checks for keeper configuration: now mixing of localhost and non-local servers is not allowed, also add checks for same value of internal raft port and keeper client port. [#35004](https://github.com/ClickHouse/ClickHouse/pull/35004) ([alesapin](https://github.com/alesapin)). +* Functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants` support implicit key cast and constant arguments. Closes [#34970](https://github.com/ClickHouse/ClickHouse/issues/34970). [#35027](https://github.com/ClickHouse/ClickHouse/pull/35027) ([Maksim Kita](https://github.com/kitaisreal)). +* Avoid division by zero in Query Profiler if Linux kernel has a bug. Closes [#34787](https://github.com/ClickHouse/ClickHouse/issues/34787). [#35032](https://github.com/ClickHouse/ClickHouse/pull/35032) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid possible `MEMORY_LIMIT_EXCEEDED` during `INSERT` into `Buffer` with `AggregateFunction`. [#35072](https://github.com/ClickHouse/ClickHouse/pull/35072) ([Azat Khuzhin](https://github.com/azat)). +* Support `view()` for `parallel_distributed_insert_select`. [#35132](https://github.com/ClickHouse/ClickHouse/pull/35132) ([Azat Khuzhin](https://github.com/azat)). +* Add setting to lower column case when reading parquet/ORC file. [#35145](https://github.com/ClickHouse/ClickHouse/pull/35145) ([shuchaome](https://github.com/shuchaome)). +* Do not retry non-rertiable errors. Closes [#35161](https://github.com/ClickHouse/ClickHouse/issues/35161). [#35172](https://github.com/ClickHouse/ClickHouse/pull/35172) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added disk_name to system.part_log. [#35178](https://github.com/ClickHouse/ClickHouse/pull/35178) ([Artyom Yurkov](https://github.com/Varinara)). +* Currently,Clickhouse validates hosts defined under for URL and Remote Table functions. This PR extends the RemoteHostFilter to Mysql and PostgreSQL table functions. [#35191](https://github.com/ClickHouse/ClickHouse/pull/35191) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Sometimes it is not enough for us to distinguish queries hierachy only by is_initial_query in system.query_log and system.processes. So distributed_depth is needed. [#35207](https://github.com/ClickHouse/ClickHouse/pull/35207) ([李扬](https://github.com/taiyang-li)). +* Support test mode for clickhouse-local. [#35264](https://github.com/ClickHouse/ClickHouse/pull/35264) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Return const for function getMacro if not in distributed query. Close [#34727](https://github.com/ClickHouse/ClickHouse/issues/34727). [#35289](https://github.com/ClickHouse/ClickHouse/pull/35289) ([李扬](https://github.com/taiyang-li)). +* Reload `remote_url_allow_hosts` after config update. [#35294](https://github.com/ClickHouse/ClickHouse/pull/35294) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix +* Ignore obsolete grants in ATTACH GRANT statements. This PR fixes [#34815](https://github.com/ClickHouse/ClickHouse/issues/34815). [#34855](https://github.com/ClickHouse/ClickHouse/pull/34855) ([Vitaly Baranov](https://github.com/vitlibar)). +* When the inner readbuffer's buffer size is too small, NEED_MORE_INPUT in `HadoopSnappyDecoder` will run multi times (>=3)for one compressed block. This makes the input data be copied into the wrong place in `HadoopSnappyDecoder::buffer`. [#35116](https://github.com/ClickHouse/ClickHouse/pull/35116) ([lgbo](https://github.com/lgbo-ustc)). + +#### Build/Testing/Packaging Improvement +* Randomize some settings in functional tests. This closes [#32268](https://github.com/ClickHouse/ClickHouse/issues/32268). [#34092](https://github.com/ClickHouse/ClickHouse/pull/34092) ([Kruglov Pavel](https://github.com/Avogar)). +* NA. [#34513](https://github.com/ClickHouse/ClickHouse/pull/34513) ([vzakaznikov](https://github.com/vzakaznikov)). +* Debian package clickhouse-test.deb removed completely. CI use tests from repository and standalone testing via deb package is no longer supported. [#34606](https://github.com/ClickHouse/ClickHouse/pull/34606) ([Ilya Yatsishin](https://github.com/qoega)). +* Set timeout 40 minutes for fast tests. [#34624](https://github.com/ClickHouse/ClickHouse/pull/34624) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Drop PVS test from CI. [#34680](https://github.com/ClickHouse/ClickHouse/pull/34680) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Limit DWARF version for debug info by 4 max, because our internal stack symbolizer cannot parse DWARF version 5. This makes sense if you compile ClickHouse with clang-15. [#34777](https://github.com/ClickHouse/ClickHouse/pull/34777) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve CI scripts arguments. [#34792](https://github.com/ClickHouse/ClickHouse/pull/34792) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use @robot-clickhouse as an author and committer for PRs like https://github.com/ClickHouse/ClickHouse/pull/34685. [#34793](https://github.com/ClickHouse/ClickHouse/pull/34793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Separate smaller clickhouse-keeper build. [#35031](https://github.com/ClickHouse/ClickHouse/pull/35031) ([alesapin](https://github.com/alesapin)). +* Clion has the following problems "The breakpoint will not currently be hit. No executable code is associated with this line". [#35179](https://github.com/ClickHouse/ClickHouse/pull/35179) ([小路](https://github.com/nicelulu)). +* Add an ability to build stripped binaries with cmake. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix distributed subquery max_query_size limitation inconsistency. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). +* Fix incorrect trivial count result when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). +* Stop to select part for mutate when the other replica has already updated the /log for ReplatedMergeTree engine. [#34633](https://github.com/ClickHouse/ClickHouse/pull/34633) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix `allow_experimental_projection_optimization` with `enable_global_with_statement` (before it may lead to `Stack size too large` error in case of multiple expressions in `WITH` clause, and also it executes scalar subqueries again and again, so not it will be more optimal). [#34650](https://github.com/ClickHouse/ClickHouse/pull/34650) ([Azat Khuzhin](https://github.com/azat)). +* Fix serialization/printing for system queries `RELOAD MODEL`, `RELOAD FUNCTION`, `RESTART DISK` when used `ON CLUSTER`. Closes [#34514](https://github.com/ClickHouse/ClickHouse/issues/34514). [#34696](https://github.com/ClickHouse/ClickHouse/pull/34696) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix ENOENT with fsync_part_directory and Vertical merge. [#34739](https://github.com/ClickHouse/ClickHouse/pull/34739) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug for h3 funcs containing const columns which cause queries to fail. [#34743](https://github.com/ClickHouse/ClickHouse/pull/34743) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix bugs for multiple columns group by in WindowView. [#34859](https://github.com/ClickHouse/ClickHouse/pull/34859) ([vxider](https://github.com/Vxider)). +* Support DDLs like CREATE USER to be executed on cross replicated cluster. [#34860](https://github.com/ClickHouse/ClickHouse/pull/34860) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix asynchronous inserts to table functions. Fixes [#34864](https://github.com/ClickHouse/ClickHouse/issues/34864). [#34866](https://github.com/ClickHouse/ClickHouse/pull/34866) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible "Part directory doesn't exist" during `INSERT`. [#34876](https://github.com/ClickHouse/ClickHouse/pull/34876) ([Azat Khuzhin](https://github.com/azat)). +* Fix postgres datetime64 conversion. Closes [#33364](https://github.com/ClickHouse/ClickHouse/issues/33364). [#34910](https://github.com/ClickHouse/ClickHouse/pull/34910) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Avoid busy polling in keeper while searching for changelog files to delete. [#34931](https://github.com/ClickHouse/ClickHouse/pull/34931) ([Azat Khuzhin](https://github.com/azat)). +* Unexpected result when use `in` in `where` in hive query. [#34945](https://github.com/ClickHouse/ClickHouse/pull/34945) ([lgbo](https://github.com/lgbo-ustc)). +* Fix wrong schema inference for unquoted dates in CSV. Closes [#34768](https://github.com/ClickHouse/ClickHouse/issues/34768). [#34961](https://github.com/ClickHouse/ClickHouse/pull/34961) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible rare error `Cannot push block to port which already has data`. Avoid pushing to port with data inside `DelayedSource`. [#34993](https://github.com/ClickHouse/ClickHouse/pull/34993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible segfault in filelog. Closes [#30749](https://github.com/ClickHouse/ClickHouse/issues/30749). [#34996](https://github.com/ClickHouse/ClickHouse/pull/34996) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix unexpected result when use -state type aggregate function in window frame. [#34999](https://github.com/ClickHouse/ClickHouse/pull/34999) ([metahys](https://github.com/metahys)). +* Fix possible exception `Reading for MergeTree family tables must be done with last position boundary`. Closes [#34979](https://github.com/ClickHouse/ClickHouse/issues/34979). [#35001](https://github.com/ClickHouse/ClickHouse/pull/35001) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix reading from `system.asynchronous_inserts` table if there exists asynchronous insert into table function. [#35050](https://github.com/ClickHouse/ClickHouse/pull/35050) ([Anton Popov](https://github.com/CurtizJ)). +* Fix missing alias after function is optimized to subcolumn when setting `optimize_functions_to_subcolumns` is enabled. Closes [#33798](https://github.com/ClickHouse/ClickHouse/issues/33798). [#35079](https://github.com/ClickHouse/ClickHouse/pull/35079) ([qieqieplus](https://github.com/qieqieplus)). +* Avoid possible deadlock on server shutdown. [#35081](https://github.com/ClickHouse/ClickHouse/pull/35081) ([Azat Khuzhin](https://github.com/azat)). +* Fixed the "update_lag" external dictionary configuration option being unusable with the error message ``Unexpected key `update_lag` in dictionary source configuration``. [#35089](https://github.com/ClickHouse/ClickHouse/pull/35089) ([Jason Chu](https://github.com/1lann)). +* fix issue: [#31469](https://github.com/ClickHouse/ClickHouse/issues/31469). [#35118](https://github.com/ClickHouse/ClickHouse/pull/35118) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix `optimize_skip_unused_shards_rewrite_in` for signed columns and negative values. [#35134](https://github.com/ClickHouse/ClickHouse/pull/35134) ([Azat Khuzhin](https://github.com/azat)). +* Fixed the incorrect translation YAML config to XML. [#35135](https://github.com/ClickHouse/ClickHouse/pull/35135) ([Miel Donkers](https://github.com/mdonkers)). +* Fix partition pruning error when non-monotonic function is used with IN operator. This fixes [#35136](https://github.com/ClickHouse/ClickHouse/issues/35136). [#35146](https://github.com/ClickHouse/ClickHouse/pull/35146) ([Amos Bird](https://github.com/amosbird)). +* Fix materialised postrgesql adding new table to replication (ATTACH TABLE) after manually removing (DETACH TABLE). Closes [#33800](https://github.com/ClickHouse/ClickHouse/issues/33800). Closes [#34922](https://github.com/ClickHouse/ClickHouse/issues/34922). Closes [#34315](https://github.com/ClickHouse/ClickHouse/issues/34315). [#35158](https://github.com/ClickHouse/ClickHouse/pull/35158) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix materialised postgres `table overrides` for partition by, etc. Closes [#35048](https://github.com/ClickHouse/ClickHouse/issues/35048). [#35162](https://github.com/ClickHouse/ClickHouse/pull/35162) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Schema inference didn't work properly on case of `INSERT INTO FUNCTION s3(...) FROM ...`, it tried to read schema from s3 file instead of from select query. [#35176](https://github.com/ClickHouse/ClickHouse/pull/35176) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix `replaceRegexpAll`, close [#35117](https://github.com/ClickHouse/ClickHouse/issues/35117). [#35182](https://github.com/ClickHouse/ClickHouse/pull/35182) ([Vladimir C](https://github.com/vdimir)). +* Fix error in query with `WITH TOTALS` in case if `HAVING` returned empty result. This fixes [#33711](https://github.com/ClickHouse/ClickHouse/issues/33711). [#35186](https://github.com/ClickHouse/ClickHouse/pull/35186) ([Amos Bird](https://github.com/amosbird)). +* * Fix reading port from config, close [#34776](https://github.com/ClickHouse/ClickHouse/issues/34776). [#35193](https://github.com/ClickHouse/ClickHouse/pull/35193) ([Vladimir C](https://github.com/vdimir)). +* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). +* Wait for IDiskRemote thread pool properly. [#35257](https://github.com/ClickHouse/ClickHouse/pull/35257) ([Azat Khuzhin](https://github.com/azat)). +* Fix `CHECK TABLE` query in case when sparse columns are enabled in table. [#35274](https://github.com/ClickHouse/ClickHouse/pull/35274) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible Abort while using Brotli compression with a small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35281](https://github.com/ClickHouse/ClickHouse/pull/35281) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible segfault in JSONEachRow schema inference. [#35291](https://github.com/ClickHouse/ClickHouse/pull/35291) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using lzma compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35295](https://github.com/ClickHouse/ClickHouse/pull/35295) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible segfault while using lz4 compression with a small max_read_buffer_size setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35296](https://github.com/ClickHouse/ClickHouse/pull/35296) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using bzip2 compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35300](https://github.com/ClickHouse/ClickHouse/pull/35300) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix partial merge join duplicate rows bug, close [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009). [#35311](https://github.com/ClickHouse/ClickHouse/pull/35311) ([Vladimir C](https://github.com/vdimir)). +* Fix segfault in Postgres database when getting create table query if database was created using named collections. Closes [#35312](https://github.com/ClickHouse/ClickHouse/issues/35312). [#35313](https://github.com/ClickHouse/ClickHouse/pull/35313) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)). + +#### NO CL ENTRY + +* NO CL ENTRY: '[ImgBot] Optimize images'. [#34590](https://github.com/ClickHouse/ClickHouse/pull/34590) ([imgbot[bot]](https://github.com/apps/imgbot)). +* NO CL ENTRY: 'Revert "Allow restrictive row policies without permissive"'. [#34782](https://github.com/ClickHouse/ClickHouse/pull/34782) ([Vitaly Baranov](https://github.com/vitlibar)). +* NO CL ENTRY: 'Revert "Remove "bugs" that do not exist anymore"'. [#35241](https://github.com/ClickHouse/ClickHouse/pull/35241) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Change timezone in Docker"'. [#35243](https://github.com/ClickHouse/ClickHouse/pull/35243) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix 00900_long_parquet_load"'. [#35301](https://github.com/ClickHouse/ClickHouse/pull/35301) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v22.3.2.2-lts.md b/docs/changelogs/v22.3.2.2-lts.md new file mode 100644 index 00000000000..fc37facc7af --- /dev/null +++ b/docs/changelogs/v22.3.2.2-lts.md @@ -0,0 +1,6 @@ +### ClickHouse release v22.3.2.2-lts FIXME as compared to v22.3.1.1262-prestable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v22.3.3.44-lts.md b/docs/changelogs/v22.3.3.44-lts.md new file mode 100644 index 00000000000..4214512c533 --- /dev/null +++ b/docs/changelogs/v22.3.3.44-lts.md @@ -0,0 +1,14 @@ +### ClickHouse release v22.3.3.44-lts FIXME as compared to v22.3.2.2-lts + +#### Bug Fix +* Backported in [#35928](https://github.com/ClickHouse/ClickHouse/issues/35928): Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#35415](https://github.com/ClickHouse/ClickHouse/issues/35415): Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#35563](https://github.com/ClickHouse/ClickHouse/issues/35563): Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#35783](https://github.com/ClickHouse/ClickHouse/issues/35783): Fix bug in conversion from custom types to string that could lead to segfault or unexpected error messages. Closes [#35752](https://github.com/ClickHouse/ClickHouse/issues/35752). [#35755](https://github.com/ClickHouse/ClickHouse/pull/35755) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#35881](https://github.com/ClickHouse/ClickHouse/issues/35881): Fixes parsing of the arguments of the functions `extract`. Fixes [#35751](https://github.com/ClickHouse/ClickHouse/issues/35751). [#35799](https://github.com/ClickHouse/ClickHouse/pull/35799) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#35856](https://github.com/ClickHouse/ClickHouse/issues/35856): Respect only quota & period from groups, ignore shares (which are not really limit the number of the cores which can be used). [#35815](https://github.com/ClickHouse/ClickHouse/pull/35815) ([filimonov](https://github.com/filimonov)). +* Backported in [#35938](https://github.com/ClickHouse/ClickHouse/issues/35938): Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v22.3.4.20-lts.md b/docs/changelogs/v22.3.4.20-lts.md new file mode 100644 index 00000000000..e746a8e3e0b --- /dev/null +++ b/docs/changelogs/v22.3.4.20-lts.md @@ -0,0 +1,14 @@ +### ClickHouse release v22.3.4.20-lts FIXME as compared to v22.3.3.44-lts + +#### Build/Testing/Packaging Improvement +* - Add `_le_` method for ClickHouseVersion - Fix auto_version for existing tag - docker_server now support getting version from tags - Add python unit tests to backport workflow. [#36028](https://github.com/ClickHouse/ClickHouse/pull/36028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36244](https://github.com/ClickHouse/ClickHouse/issues/36244): Fix usage of quota with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#36240](https://github.com/ClickHouse/ClickHouse/issues/36240): Fix possible loss of subcolumns in type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#36242](https://github.com/ClickHouse/ClickHouse/issues/36242): Fix possible `Can't adjust last granule` exception while reading subcolumns of type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#35938](https://github.com/ClickHouse/ClickHouse/issues/35938): Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#36147](https://github.com/ClickHouse/ClickHouse/issues/36147): Fix reading from `Kafka` tables when `kafka_num_consumers > 1` and `kafka_thread_per_consumer = 0`. Returns parallel & multithreaded reading, accidentally broken in 21.11. Closes [#35153](https://github.com/ClickHouse/ClickHouse/issues/35153). [#35973](https://github.com/ClickHouse/ClickHouse/pull/35973) ([filimonov](https://github.com/filimonov)). +* Backported in [#36276](https://github.com/ClickHouse/ClickHouse/issues/36276): Fix reading of empty arrays in reverse order (in queries with descending sorting by prefix of primary key). [#36215](https://github.com/ClickHouse/ClickHouse/pull/36215) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v22.3.5.5-lts.md b/docs/changelogs/v22.3.5.5-lts.md new file mode 100644 index 00000000000..d1c42807f41 --- /dev/null +++ b/docs/changelogs/v22.3.5.5-lts.md @@ -0,0 +1,7 @@ +### ClickHouse release v22.3.5.5-lts FIXME as compared to v22.3.4.20-lts + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36525](https://github.com/ClickHouse/ClickHouse/issues/36525): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#36674](https://github.com/ClickHouse/ClickHouse/issues/36674): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v22.3.6.5-lts.md b/docs/changelogs/v22.3.6.5-lts.md new file mode 100644 index 00000000000..70a81bbe9ad --- /dev/null +++ b/docs/changelogs/v22.3.6.5-lts.md @@ -0,0 +1,7 @@ +### ClickHouse release v22.3.6.5-lts FIXME as compared to v22.3.5.5-lts + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36525](https://github.com/ClickHouse/ClickHouse/issues/36525): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#36795](https://github.com/ClickHouse/ClickHouse/issues/36795): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v22.4.1.2305-prestable.md b/docs/changelogs/v22.4.1.2305-prestable.md new file mode 100644 index 00000000000..ffd11c7d9c4 --- /dev/null +++ b/docs/changelogs/v22.4.1.2305-prestable.md @@ -0,0 +1,239 @@ +### ClickHouse release v22.4.1.2305-prestable FIXME as compared to v22.3.1.1262-prestable + +#### Backward Incompatible Change +* Function `yandexConsistentHash` (consistent hashing algorithm by Konstantin "kostik" Oblakov) is renamed to `kostikConsistentHash`. The old name is left as an alias for compatibility. Although this change is backward compatible, we may remove the alias in subsequent releases, that's why it's recommended to update the usages of this function in your apps. [#35553](https://github.com/ClickHouse/ClickHouse/pull/35553) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `parser_settings_after_format_compact` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)). +* Changed hashed path for cache files. [#36079](https://github.com/ClickHouse/ClickHouse/pull/36079) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### New Feature +* Added support for transactions for simple `MergeTree` tables. This feature is highly experimental and not recommended for production. Part of [#22086](https://github.com/ClickHouse/ClickHouse/issues/22086). [#24258](https://github.com/ClickHouse/ClickHouse/pull/24258) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added load balancing setting for [Zoo]Keeper client. Closes [#29617](https://github.com/ClickHouse/ClickHouse/issues/29617). [#30325](https://github.com/ClickHouse/ClickHouse/pull/30325) ([小路](https://github.com/nicelulu)). +* New aggregation function groupSortedArray to obtain an array of first N values. [#34055](https://github.com/ClickHouse/ClickHouse/pull/34055) ([palegre-tiny](https://github.com/palegre-tiny)). +* New functions minSampleSizeContinous and minSampleSizeConversion. [#34354](https://github.com/ClickHouse/ClickHouse/pull/34354) ([achimbab](https://github.com/achimbab)). +* Profiling on Processors level (under `log_processors_profiles` setting, ClickHouse will write time that processor spent during execution/waiting for data to `system.processors_profile_log` table). [#34355](https://github.com/ClickHouse/ClickHouse/pull/34355) ([Azat Khuzhin](https://github.com/azat)). +* Add `toEndOfMonth` function which rounds up a date or date with time to the last day of the month. [#33501](https://github.com/ClickHouse/ClickHouse/issues/33501). [#34394](https://github.com/ClickHouse/ClickHouse/pull/34394) ([Habibullah Oladepo](https://github.com/holadepo)). +* Add `h3PointDistM`, `h3PointDistKm`, `h3PointDistRads`, `h3GetRes0Indexes`, `h3GetPentagonIndexes` functions. [#34568](https://github.com/ClickHouse/ClickHouse/pull/34568) ([Bharat Nallan](https://github.com/bharatnc)). +* Introduce format `ProtobufList`. Fixes [#16436](https://github.com/ClickHouse/ClickHouse/issues/16436). [#35152](https://github.com/ClickHouse/ClickHouse/pull/35152) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* A dedicated small package for `clickhouse-keeper`. [#35308](https://github.com/ClickHouse/ClickHouse/pull/35308) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* added INTERPOLATE extension to the ORDER BY ... WITH FILL closes [#34903](https://github.com/ClickHouse/ClickHouse/issues/34903). [#35349](https://github.com/ClickHouse/ClickHouse/pull/35349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Added functions `minSampleSizeContinous` and `minSampleSizeConversion`. Author @achimbab. [#35360](https://github.com/ClickHouse/ClickHouse/pull/35360) ([Maksim Kita](https://github.com/kitaisreal)). +* Added functions `arrayFirstOrNull`, `arrayLastOrNull`. Closes [#35238](https://github.com/ClickHouse/ClickHouse/issues/35238). [#35414](https://github.com/ClickHouse/ClickHouse/pull/35414) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to write remote fs cache on all write operations. Add `system.remote_filesystem_cache` table. Add `drop remote filesystem cache` query. Add introspection for s3 metadata with `system.remote_data_paths` table. Closes [#34021](https://github.com/ClickHouse/ClickHouse/issues/34021). Add cache option for merges by adding mode `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` (turned on by default for merges and can also be turned on by query setting with the same name). Rename cache related settings (`remote_fs_enable_cache -> enable_filesystem_cache`, etc). [#35475](https://github.com/ClickHouse/ClickHouse/pull/35475) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added functions makeDate(year, month, day), makeDate32(year, month, day). [#35628](https://github.com/ClickHouse/ClickHouse/pull/35628) ([Alexander Gololobov](https://github.com/davenger)). +* Added function `flattenTuple`. It receives nested named `Tuple` as an argument and returns a flatten `Tuple` which elements are the paths from the original `Tuple`. E.g.: `Tuple(a Int, Tuple(b Int, c Int)) -> Tuple(a Int, b Int, c Int)`. `flattenTuple` can be used to select all paths from type `Object` as separate columns. [#35690](https://github.com/ClickHouse/ClickHouse/pull/35690) ([Anton Popov](https://github.com/CurtizJ)). +* Support new type of quota `WRITTEN BYTES` to limit amount of written bytes during insert queries. [#35736](https://github.com/ClickHouse/ClickHouse/pull/35736) ([Anton Popov](https://github.com/CurtizJ)). +* Implementation of makeDateTime() and makeDateTIme64(). [#35934](https://github.com/ClickHouse/ClickHouse/pull/35934) ([Alexander Gololobov](https://github.com/davenger)). +* Support '\G;' at the end of query for FORMAT Vertical. Closes [#36111](https://github.com/ClickHouse/ClickHouse/issues/36111). [#36130](https://github.com/ClickHouse/ClickHouse/pull/36130) ([yuuch](https://github.com/yuuch)). +* Adding random salt and appending to password to generate password hash. [#36172](https://github.com/ClickHouse/ClickHouse/pull/36172) ([Rajkumar Varada](https://github.com/varadarajkumar)). +* Add setting throw_if_no_data_to_insert. Closes [#36336](https://github.com/ClickHouse/ClickHouse/issues/36336). [#36345](https://github.com/ClickHouse/ClickHouse/pull/36345) ([flynn](https://github.com/ucasfl)). +* Implement type inference for INSERT INTO function null(). Closes [#36334](https://github.com/ClickHouse/ClickHouse/issues/36334). [#36353](https://github.com/ClickHouse/ClickHouse/pull/36353) ([flynn](https://github.com/ucasfl)). +* ... [#36436](https://github.com/ClickHouse/ClickHouse/pull/36436) ([Rich Raposa](https://github.com/rfraposa)). + +#### Performance Improvement +* Speed up parts loading process of MergeTree to accelerate starting up of clickhouse-server. With this improvement, clickhouse-server was able to decrease starting up time from 75 minutes to 20 seconds, with 700k mergetree parts. [#32928](https://github.com/ClickHouse/ClickHouse/pull/32928) ([李扬](https://github.com/taiyang-li)). +* Sizes of hash tables used during aggregation now collected and used in later queries to avoid hash tables resizes. [#33439](https://github.com/ClickHouse/ClickHouse/pull/33439) ([Nikita Taranov](https://github.com/nickitat)). +* Multiple changes to improve ASOF join performance (1.2 - 1.6x as fast). It also adds support to use big integers. [#34733](https://github.com/ClickHouse/ClickHouse/pull/34733) ([Raúl Marín](https://github.com/Algunenano)). +* URL storage engine now downloads multiple chunks in parallel if the endpoint supports HTTP Range. Two additional settings were added, `max_download_threads` and `max_download_buffer_size`, which control maximum number of threads a single query can use to download the file and the maximum number of bytes each thread can process. [#35150](https://github.com/ClickHouse/ClickHouse/pull/35150) ([Antonio Andelic](https://github.com/antonio2368)). +* parallelization of multipart upload into S3 storage. [#35343](https://github.com/ClickHouse/ClickHouse/pull/35343) ([Sergei Trifonov](https://github.com/serxa)). +* Improve performance of ASOF JOIN if key is native integer. [#35525](https://github.com/ClickHouse/ClickHouse/pull/35525) ([Maksim Kita](https://github.com/kitaisreal)). +* A new query plan optimization. Evaluate functions after `ORDER BY` when possible. As an example, for a query `SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number LIMIT 5`, function `sipHash64` would be evaluated after `ORDER BY` and `LIMIT`, which gives ~20x speed up. [#35623](https://github.com/ClickHouse/ClickHouse/pull/35623) ([Nikita Taranov](https://github.com/nickitat)). +* narrow mutex scope when setenv LIBHDFS3_CONF related issue [#35292](https://github.com/ClickHouse/ClickHouse/issues/35292). [#35646](https://github.com/ClickHouse/ClickHouse/pull/35646) ([shuchaome](https://github.com/shuchaome)). +* Improve performance of `hasAll` function using specializations for SSE and AVX2. Author @youennL-cs. [#35723](https://github.com/ClickHouse/ClickHouse/pull/35723) ([Maksim Kita](https://github.com/kitaisreal)). +* - The explain statement of GLOBAL JOIN two distributed tables can speed up 100x: explain plan select ... from t1_dist global join t2_dist on ... explain pipeline select ... from t1_dist global join t2_dist on ... [#36055](https://github.com/ClickHouse/ClickHouse/pull/36055) ([何李夫](https://github.com/helifu)). +* 2 optimizations: - Optimize trivail count hive query - Speed up hive query by caching metadata of hive file. [#36082](https://github.com/ClickHouse/ClickHouse/pull/36082) ([李扬](https://github.com/taiyang-li)). + +#### Improvement +* ... [#21474](https://github.com/ClickHouse/ClickHouse/pull/21474) ([nvartolomei](https://github.com/nvartolomei)). +* As talked in [issue 27025](https://github.com/ClickHouse/ClickHouse/issues/27025), there is an improvement of the HasAll function using SIMD instruction (SSE and AVX2). Gtest tests have also been added. [#27653](https://github.com/ClickHouse/ClickHouse/pull/27653) ([youennL-cs](https://github.com/youennL-cs)). +* Proper support of setting `max_rows_to_read` in case of reading in order of sorting key and specified limit. Previously the exception `Limit for rows or bytes to read exceeded` could be thrown even if query actually requires to read less amount of rows. [#33230](https://github.com/ClickHouse/ClickHouse/pull/33230) ([Anton Popov](https://github.com/CurtizJ)). +* INTERVAL improvement - can be used with `[MILLI|MICRO|NANO]SECOND`. Added `toStartOf[Milli|Micro|Nano]second()` functions. Added `[add|subtract][Milli|Micro|Nano]second()`. [#34353](https://github.com/ClickHouse/ClickHouse/pull/34353) ([Andrey Zvonov](https://github.com/zvonand)). +* System log tables allow to specify COMMENT in ENGINE declaration. Closes [#33768](https://github.com/ClickHouse/ClickHouse/issues/33768). [#34536](https://github.com/ClickHouse/ClickHouse/pull/34536) ([Maksim Kita](https://github.com/kitaisreal)). +* added sanity checks on server startup (available memory and disk space, max thread count, etc). [#34566](https://github.com/ClickHouse/ClickHouse/pull/34566) ([Sergei Trifonov](https://github.com/serxa)). +* Use minmax index for orc/parquet file in Hive Engine. Related pr: https://github.com/ClickHouse-Extras/arrow/pull/10. [#34631](https://github.com/ClickHouse/ClickHouse/pull/34631) ([李扬](https://github.com/taiyang-li)). +* If `port` is not specified in cluster configuration, default server port will be used. This closes [#34769](https://github.com/ClickHouse/ClickHouse/issues/34769). [#34772](https://github.com/ClickHouse/ClickHouse/pull/34772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added better error messages in case of connection failed to MySQL. Closes [#35128](https://github.com/ClickHouse/ClickHouse/issues/35128). [#35234](https://github.com/ClickHouse/ClickHouse/pull/35234) ([zzsmdfj](https://github.com/zzsmdfj)). +* Add function `getTypeSerializationStreams`. For a specified type (which is detected from column), it returns an array with all the serialization substream paths. This function is useful mainly for developers. [#35290](https://github.com/ClickHouse/ClickHouse/pull/35290) ([李扬](https://github.com/taiyang-li)). +* - wchc operation is expensive and should not be in the four_letter_word_white_list defaults. [#35320](https://github.com/ClickHouse/ClickHouse/pull/35320) ([zhangyuli1](https://github.com/zhangyuli1)). +* Added an ability to specify cluster secret in replicated database. [#35333](https://github.com/ClickHouse/ClickHouse/pull/35333) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add a new kind of row policies named `simple`. Before this PR we had two kinds or row policies: `permissive` and `restrictive`. A `simple` row policy adds a new filter on a table without any side-effects like it was for permissive and restrictive policies. [#35345](https://github.com/ClickHouse/ClickHouse/pull/35345) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove testmode option, enable it unconditionally. [#35354](https://github.com/ClickHouse/ClickHouse/pull/35354) ([Kseniia Sumarokova](https://github.com/kssenii)). +* For table function `s3cluster` or `HDFSCluster` or `hive`, we can't get right `AccessType` by `StorageFactory::instance().getSourceAccessType(getStorageTypeName())`. This pr fix it. [#35365](https://github.com/ClickHouse/ClickHouse/pull/35365) ([李扬](https://github.com/taiyang-li)). +* For lts releases packages will be pushed to both lts and stable repos. [#35382](https://github.com/ClickHouse/ClickHouse/pull/35382) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Support uuid for postgres engines. Closes [#35384](https://github.com/ClickHouse/ClickHouse/issues/35384). [#35403](https://github.com/ClickHouse/ClickHouse/pull/35403) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add arguments `--user`, `--password`, `--host`, `--port` for clickhouse-diagnostics. [#35422](https://github.com/ClickHouse/ClickHouse/pull/35422) ([李扬](https://github.com/taiyang-li)). +* fix INSERT INTO table FROM INFILE does not display progress bar. [#35429](https://github.com/ClickHouse/ClickHouse/pull/35429) ([xiedeyantu](https://github.com/xiedeyantu)). +* Allow server to bind to low-numbered ports (e.g. 443). ClickHouse installation script will set `cap_net_bind_service` to the binary file. [#35451](https://github.com/ClickHouse/ClickHouse/pull/35451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add settings `input_format_orc_case_insensitive_column_matching`, `input_format_arrow_case_insensitive_column_matching`, and `input_format_parquet_case_insensitive_column_matching` which allows ClickHouse to use case insensitive matching of columns while reading data from ORC, Arrow or Parquet files. [#35459](https://github.com/ClickHouse/ClickHouse/pull/35459) ([Antonio Andelic](https://github.com/antonio2368)). +* - Add explicit table info to the scan node of query plan and pipeline. [#35460](https://github.com/ClickHouse/ClickHouse/pull/35460) ([何李夫](https://github.com/helifu)). +* Propagate query and session settings for distributed DDL queries. Setting `distributed_ddl_entry_format_version` is set to 2 by default now. [#35463](https://github.com/ClickHouse/ClickHouse/pull/35463) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add sizes of subcolumns to `system.parts_columns` table. [#35488](https://github.com/ClickHouse/ClickHouse/pull/35488) ([Anton Popov](https://github.com/CurtizJ)). +* It was possible to get stack overflow in distributed queries if one of the settings `async_socket_for_remote` and `use_hedged_requests` is enabled while parsing very deeply nested data type (at least in debug build). Closes [#35509](https://github.com/ClickHouse/ClickHouse/issues/35509). [#35524](https://github.com/ClickHouse/ClickHouse/pull/35524) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve pasting performance and compatibility of clickhouse-client. This helps [#35501](https://github.com/ClickHouse/ClickHouse/issues/35501). [#35541](https://github.com/ClickHouse/ClickHouse/pull/35541) ([Amos Bird](https://github.com/amosbird)). +* Added a support for automatic schema inference to `s3Cluster` table function. Synced the signatures of `s3 ` and `s3Cluster`. [#35544](https://github.com/ClickHouse/ClickHouse/pull/35544) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use multiple threads to download objects from S3. Downloading is controllable using `max_download_threads` and `max_download_buffer_size` settings. [#35571](https://github.com/ClickHouse/ClickHouse/pull/35571) ([Antonio Andelic](https://github.com/antonio2368)). +* Deduce absolute hdfs config path. [#35572](https://github.com/ClickHouse/ClickHouse/pull/35572) ([李扬](https://github.com/taiyang-li)). +* - Use some tweaks and heuristics to determine numbers, strings, arrays, tuples and maps in CSV, TSV and TSVRaw data formats. Add setting `input_format_csv_use_best_effort_in_schema_inference` for CSV format that enables/disables using these heuristics, if it's disabled, we treat everything as string. Add similar setting `input_format_tsv_use_best_effort_in_schema_inference` for TSV/TSVRaw format. These settings are enabled by default. - Add Maps support for schema inference in Values format. - Fix possible segfault in schema inference in Values format. - Allow to skip columns with unsupported types in Arrow/ORC/Parquet formats. Add corresponding settings for it: `input_format_{parquet|orc|arrow}_skip_columns_with_unsupported_types_in_schema_inference`. These settings are disabled by default. - Allow to convert a column with type Null to a Nullable column with all NULL values in Arrow/Parquet formats. - Allow to specify column names in schema inference via setting `column_names_for_schema_inference` for formats that don't contain column names (like CSV, TSV, JSONCompactEachRow, etc) - Fix schema inference in ORC/Arrow/Parquet formats in terms of working with Nullable columns. Previously all inferred types were not Nullable and it blocked reading Nullable columns from data, now it's fixed and all inferred types are always Nullable (because we cannot understand that column is Nullable or not by reading the schema). - Fix schema inference in Template format with CSV escaping rules. [#35582](https://github.com/ClickHouse/ClickHouse/pull/35582) ([Kruglov Pavel](https://github.com/Avogar)). +* Add parallel parsing and schema inference for format `JSONAsObject`. [#35592](https://github.com/ClickHouse/ClickHouse/pull/35592) ([Anton Popov](https://github.com/CurtizJ)). +* Added support for schema inference for `hdfsCluster`. [#35602](https://github.com/ClickHouse/ClickHouse/pull/35602) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* - Improve the pipeline description for JOIN. [#35612](https://github.com/ClickHouse/ClickHouse/pull/35612) ([何李夫](https://github.com/helifu)). +* Support schema inference for type `Object` in format `JSONEachRow`. Allow to convert columns of type `Map` to columns of type `Object`. [#35629](https://github.com/ClickHouse/ClickHouse/pull/35629) ([Anton Popov](https://github.com/CurtizJ)). +* Add profile event counter `AsyncInsertBytes` about size of async INSERTs. [#35644](https://github.com/ClickHouse/ClickHouse/pull/35644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added `is_secure` column to `system.query_log` which denotes if the client is using a secure connection over TCP or HTTP. [#35705](https://github.com/ClickHouse/ClickHouse/pull/35705) ([Antonio Andelic](https://github.com/antonio2368)). +* closes [#35641](https://github.com/ClickHouse/ClickHouse/issues/35641) Allow EPHEMERAL without explicit default expression. [#35706](https://github.com/ClickHouse/ClickHouse/pull/35706) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix send_logs_level for clickhouse local. Closes [#35653](https://github.com/ClickHouse/ClickHouse/issues/35653). [#35716](https://github.com/ClickHouse/ClickHouse/pull/35716) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve columns ordering in schema inference for formats TSKV and JSONEachRow, closes [#35640](https://github.com/ClickHouse/ClickHouse/issues/35640). Don't stop schema inference when reading empty row in schema inference for formats TSKV and JSONEachRow. [#35724](https://github.com/ClickHouse/ClickHouse/pull/35724) ([Kruglov Pavel](https://github.com/Avogar)). +* Add new setting `input_format_json_read_bools_as_numbers` that allows to infer and parse bools as numbers in JSON input formats. It's enabled by default. Suggested by @alexey-milovidov. [#35735](https://github.com/ClickHouse/ClickHouse/pull/35735) ([Kruglov Pavel](https://github.com/Avogar)). +* Respect remote_url_allow_hosts for hive. [#35743](https://github.com/ClickHouse/ClickHouse/pull/35743) ([李扬](https://github.com/taiyang-li)). +* Support schema inference for insert select with using `input` table function. Get schema from insertion table instead of inferring it from the data in case of insert select from table functions that support schema inference. Closes [#35639](https://github.com/ClickHouse/ClickHouse/issues/35639). [#35760](https://github.com/ClickHouse/ClickHouse/pull/35760) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve projection analysis to optimize trivial queries such as `count()`. [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) ([Amos Bird](https://github.com/amosbird)). +* support ALTER TABLE t DETACH PARTITION (ALL). [#35794](https://github.com/ClickHouse/ClickHouse/pull/35794) ([awakeljw](https://github.com/awakeljw)). +* Added an animation to the hourglass icon to indicate to the user that a query is running. [#35860](https://github.com/ClickHouse/ClickHouse/pull/35860) ([peledni](https://github.com/peledni)). +* Now some `ALTER MODIFY COLUMN` queries for `Arrays` and `Nullable` types can be done at metadata level without mutations. For example, alter from `Array(Enum8('Option1'=1))` to `Array(Enum8('Option1'=1, 'Option2'=2))`. [#35882](https://github.com/ClickHouse/ClickHouse/pull/35882) ([alesapin](https://github.com/alesapin)). +* Now it's not allowed to `ALTER TABLE ... RESET SETTING` for non-existing settings for MergeTree engines family. Fixes [#35816](https://github.com/ClickHouse/ClickHouse/issues/35816). [#35884](https://github.com/ClickHouse/ClickHouse/pull/35884) ([alesapin](https://github.com/alesapin)). +* Improve settings configuration for s3 storage / table function. [#35915](https://github.com/ClickHouse/ClickHouse/pull/35915) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add some basic metrics to monitor engine=Kafka tables. [#35916](https://github.com/ClickHouse/ClickHouse/pull/35916) ([filimonov](https://github.com/filimonov)). +* Now `kafka_num_consumers` can be bigger than amount of physical cores in case of low resource machine (less than 16 cores). [#35926](https://github.com/ClickHouse/ClickHouse/pull/35926) ([alesapin](https://github.com/alesapin)). +* Update unixodbc to mitigate CVE-2018-7485. [#35943](https://github.com/ClickHouse/ClickHouse/pull/35943) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Require mutations for per-table TTL only when it had been changed. [#35953](https://github.com/ClickHouse/ClickHouse/pull/35953) ([Azat Khuzhin](https://github.com/azat)). +* - Add `dns_max_consecutive_failures` setting to stop re-resolving cached DNS entries after a number of consecutive failures (5 by default). [#35956](https://github.com/ClickHouse/ClickHouse/pull/35956) ([Raúl Marín](https://github.com/Algunenano)). +* ASTPartition::formatImpl should output ALL while executing ALTER TABLE t DETACH PARTITION ALL. [#35987](https://github.com/ClickHouse/ClickHouse/pull/35987) ([awakeljw](https://github.com/awakeljw)). +* `clickhouse-keeper` starts answering 4-letter commands before getting the quorum. [#35992](https://github.com/ClickHouse/ClickHouse/pull/35992) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix wrong assertion in replxx which happens when navigating back the history when the first line of input is a newline. Mark as improvement because it only affects debug build. This fixes [#34511](https://github.com/ClickHouse/ClickHouse/issues/34511). [#36007](https://github.com/ClickHouse/ClickHouse/pull/36007) ([Amos Bird](https://github.com/amosbird)). +* If someone writes DEFAULT NULL in table definition, make data type Nullable. [#35887](https://github.com/ClickHouse/ClickHouse/issues/35887). [#36058](https://github.com/ClickHouse/ClickHouse/pull/36058) ([xiedeyantu](https://github.com/xiedeyantu)). +* Added `thread_id` and `query_id` columns to `system.zookeeper_log` table. [#36074](https://github.com/ClickHouse/ClickHouse/pull/36074) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Auto assign numbers for Enum elements. [#36101](https://github.com/ClickHouse/ClickHouse/pull/36101) ([awakeljw](https://github.com/awakeljw)). +* Reset thread name in `ThreadPool` to `ThreadPoolIdle` after job is done. This is to avoid displaying the old thread name for idle threads. This closes [#36114](https://github.com/ClickHouse/ClickHouse/issues/36114). [#36115](https://github.com/ClickHouse/ClickHouse/pull/36115) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support `UNSIGNED` modifier with unused parameters of `INT`. [#36126](https://github.com/ClickHouse/ClickHouse/pull/36126) ([awakeljw](https://github.com/awakeljw)). +* Add support for atomic exchange in OSX. [#36133](https://github.com/ClickHouse/ClickHouse/pull/36133) ([Raúl Marín](https://github.com/Algunenano)). +* Update the progress bar after receiving every ProfileEvents packet. This change must fix the showing of outdated profiling data in client. [#36202](https://github.com/ClickHouse/ClickHouse/pull/36202) ([Dmitry Novik](https://github.com/novikd)). +* Check ORC/Parquet/Arrow format magic bytes before loading file in memory to prevent high memory usage in case of wrong file format. [#36209](https://github.com/ClickHouse/ClickHouse/pull/36209) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow queries `insert into function file(...) select from` for files with formats that don't support schema inference. For example: `insert into function file(data.json) select 42` - such query didn't work previously. [#36211](https://github.com/ClickHouse/ClickHouse/pull/36211) ([Kruglov Pavel](https://github.com/Avogar)). +* Send both stdin data and data from query/data from infile in client. Previously client ignored stdin data in case of both sources were present. Closes [#36100](https://github.com/ClickHouse/ClickHouse/issues/36100). [#36254](https://github.com/ClickHouse/ClickHouse/pull/36254) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow missing columns for mongo storage. Closes [#36119](https://github.com/ClickHouse/ClickHouse/issues/36119). Closes [#26490](https://github.com/ClickHouse/ClickHouse/issues/26490). [#36272](https://github.com/ClickHouse/ClickHouse/pull/36272) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Input format parsers can synchronize after wrong value of `Bool` or `Map` data types (see the `input_format_allow_errors_*` settings). [#36333](https://github.com/ClickHouse/ClickHouse/pull/36333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check for harmful environment variables like `LD_PRELOAD` at startup. It makes sense in [Google Collab](https://colab.research.google.com/drive/1wzYn59PA9EDyra6356a8rUpwd3zUX0Zt?usp=sharing). This closes [#36340](https://github.com/ClickHouse/ClickHouse/issues/36340). [#36342](https://github.com/ClickHouse/ClickHouse/pull/36342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix [#36307](https://github.com/ClickHouse/ClickHouse/issues/36307) [#35891](https://github.com/ClickHouse/ClickHouse/issues/35891) possible range issues in automatic assigned enums, also fix error message. [#36352](https://github.com/ClickHouse/ClickHouse/pull/36352) ([awakeljw](https://github.com/awakeljw)). +* hex function support for Int128/Int256/UInt128/UInt256. [#36386](https://github.com/ClickHouse/ClickHouse/pull/36386) ([Memo](https://github.com/Joeywzr)). + +#### Bug Fix +* Add type checking when create materialized view. Try to close: [#23684](https://github.com/ClickHouse/ClickHouse/issues/23684). [#24896](https://github.com/ClickHouse/ClickHouse/pull/24896) ([hexiaoting](https://github.com/hexiaoting)). +* Avoid erasing columns from a block if it doesn't exist while reading data from Hive. [#35393](https://github.com/ClickHouse/ClickHouse/pull/35393) ([lgbo](https://github.com/lgbo-ustc)). +* Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)). +* In FileSegmentsHolder::~FileSegmentsHolder(), when a segment is set to detach, it will assert its state is empty. However, in FileSegment::completeImpl(), when detach is set to true, its state may be PARTIALLY_DOWNLOADED_NO_CONTINUATION or SKIP_CACHE or PARTIALLY_DOWNLOADED, thus cause error in FileSegmentsHolder::~FileSegmentsHolder(). ``` if (file_segment->detached) { /// This file segment is not owned by cache, so it will be destructed /// at this point, therefore no completion required. assert(file_segment->state() == FileSegment::State::EMPTY); file_segment_it = file_segments.erase(current_file_segment_it); continue; } ```. [#36452](https://github.com/ClickHouse/ClickHouse/pull/36452) ([Han Shukai](https://github.com/KinderRiven)). + +#### Build/Testing/Packaging Improvement +* Add backward compatibility check in stress test. Closes [#25088](https://github.com/ClickHouse/ClickHouse/issues/25088). [#27928](https://github.com/ClickHouse/ClickHouse/pull/27928) ([Kruglov Pavel](https://github.com/Avogar)). +* - Migrate package building to nfpm - Deprecate `release` script in favor of `packages/build` - Build everything in clickhouse/binary-builder image (cleanup: clickhouse/deb-builder) - Add symbol stripping to cmake (todo: use $prefix/lib/$bin_dir/clickhouse/$binary.debug) - Fix issue with DWARF symbols - Add Alpine APK packages - Rename `alien` to `additional_pkgs`. [#33664](https://github.com/ClickHouse/ClickHouse/pull/33664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a night scan and upload for coverity. [#34895](https://github.com/ClickHouse/ClickHouse/pull/34895) ([Boris Kuschel](https://github.com/bkuschel)). +* - Switch to libcxx / libcxxabi from LLVM 14. [#34906](https://github.com/ClickHouse/ClickHouse/pull/34906) ([Raúl Marín](https://github.com/Algunenano)). +* Add next batch of random settings in functional tests. [#35047](https://github.com/ClickHouse/ClickHouse/pull/35047) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix stress-test report in CI, now we upload the runlog with information about started stress tests only once. [#35093](https://github.com/ClickHouse/ClickHouse/pull/35093) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - [x] redo alpine image to use clean Dockerfile - [x] Create a script in tests/ci to build both ubuntu and alpine images - [x] Add clickhouse-keeper image (cc @nikitamikhaylov) - [x] Add build check to PullRequestCI - [x] Add a job to a ReleaseCI - [x] Add a job to MasterCI to build and push `clickhouse/clickhouse-server:head` and `clickhouse/clickhouse-keeper:head` images for each merged PR. [#35211](https://github.com/ClickHouse/ClickHouse/pull/35211) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reverse `--no-prestable` key to match the logic. [#35372](https://github.com/ClickHouse/ClickHouse/pull/35372) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* All disabled tests run longer than 30 seconds. [#35413](https://github.com/ClickHouse/ClickHouse/pull/35413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix copypaste error for clickhouse-keeper test. [#35428](https://github.com/ClickHouse/ClickHouse/pull/35428) ([zhangyuli1](https://github.com/zhangyuli1)). +* Fix failed tests in: https://s3.amazonaws.com/clickhouse-test-reports/35422/32348779fd0bac5276727cfc01e75c625ecc69b9/fuzzer_astfuzzerubsan,actions//report.html. [#35439](https://github.com/ClickHouse/ClickHouse/pull/35439) ([李扬](https://github.com/taiyang-li)). +* Apply black formatter to python code and add a per-commit check. [#35466](https://github.com/ClickHouse/ClickHouse/pull/35466) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a label to recognize a building task for every image. [#35583](https://github.com/ClickHouse/ClickHouse/pull/35583) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Clean-up after functional test 02167 ... [#35681](https://github.com/ClickHouse/ClickHouse/pull/35681) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Minor improvement in contrib/krb5 build configuration. [#35832](https://github.com/ClickHouse/ClickHouse/pull/35832) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Update URL in test visualizer from `play-ci` to `play` (it was moved). [#35872](https://github.com/ClickHouse/ClickHouse/pull/35872) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Highlight headers in the PR template. Improve description checking logging. [#35947](https://github.com/ClickHouse/ClickHouse/pull/35947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Running with podman is failing: it complains about specifying the same volume twice. [#35978](https://github.com/ClickHouse/ClickHouse/pull/35978) ([Roman Nikonov](https://github.com/nic11)). +* Add argument for total number of desired builds. [#35999](https://github.com/ClickHouse/ClickHouse/pull/35999) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* - Add `_le_` method for ClickHouseVersion - Fix auto_version for an existing tag - docker_server now supports getting the version from tags - Add python unit tests to backport workflow - Move version_arg to version_helper, add tests. [#36029](https://github.com/ClickHouse/ClickHouse/pull/36029) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Significant improvement in docker build-cache system. [#36041](https://github.com/ClickHouse/ClickHouse/pull/36041) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* After CVE-2022-24765 git needs additional config parameter when directory is owned by another user. [#36193](https://github.com/ClickHouse/ClickHouse/pull/36193) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add an option for build profiling (`-ftime-trace`). [#36318](https://github.com/ClickHouse/ClickHouse/pull/36318) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Delete old packaging infrastructure. [#36330](https://github.com/ClickHouse/ClickHouse/pull/36330) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fallback to a default event in case of broken API. [#36412](https://github.com/ClickHouse/ClickHouse/pull/36412) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Check a number of required reports in BuilderSpecialReport. [#36413](https://github.com/ClickHouse/ClickHouse/pull/36413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a labeling for `Revert` PRs. [#36422](https://github.com/ClickHouse/ClickHouse/pull/36422) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Disallow ALTER TTL for engines that does not support it, to avoid breaking ATTACH TABLE (closes [#33344](https://github.com/ClickHouse/ClickHouse/issues/33344)). [#33391](https://github.com/ClickHouse/ClickHouse/pull/33391) ([zhongyuankai](https://github.com/zhongyuankai)). +* Do not delay final part writing by default (fixes possible `Memory limit exceeded` during `INSERT` by adding `max_insert_delayed_streams_for_parallel_write` with default to 1000 for writes to s3 and disabled as before otherwise). [#34780](https://github.com/ClickHouse/ClickHouse/pull/34780) ([Azat Khuzhin](https://github.com/azat)). +* fix issue:input_format_null_as_default does not work for DEFAULT expressions Closes [#34890](https://github.com/ClickHouse/ClickHouse/issues/34890). [#35039](https://github.com/ClickHouse/ClickHouse/pull/35039) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix mutations in tables with enabled sparse columns. [#35284](https://github.com/ClickHouse/ClickHouse/pull/35284) ([Anton Popov](https://github.com/CurtizJ)). +* Fix schema inference for TSKV format while using small max_read_buffer_size. [#35332](https://github.com/ClickHouse/ClickHouse/pull/35332) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix partition pruning in case of comparison with constant in `WHERE`. If column and constant had different types, overflow was possible. Query could return an incorrect empty result. This fixes [#35304](https://github.com/ClickHouse/ClickHouse/issues/35304). [#35334](https://github.com/ClickHouse/ClickHouse/pull/35334) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)). +* Fix issue with non-existing directory https://github.com/ClickHouse/ClickHouse/runs/5588046879?check_suite_focus=true. [#35376](https://github.com/ClickHouse/ClickHouse/pull/35376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix wrong assets path in release workflow. [#35379](https://github.com/ClickHouse/ClickHouse/pull/35379) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Cache fixes for high concurrency on corner cases. [#35381](https://github.com/ClickHouse/ClickHouse/pull/35381) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix working with columns that are not needed in query in Arrow/Parquet/ORC formats, it prevents possible errors like `Unsupported type of an input column ` when file contains column with unsupported type and we don't use it in query. [#35406](https://github.com/ClickHouse/ClickHouse/pull/35406) ([Kruglov Pavel](https://github.com/Avogar)). +* Skip empty chunks in GroupingAggregatedTransform. [#35417](https://github.com/ClickHouse/ClickHouse/pull/35417) ([Nikita Taranov](https://github.com/nickitat)). +* Now merges executed with zero copy replication will not spam logs with message `Found parts with the same min block and with the same max block as the missing part _ on replica _. Hoping that it will eventually appear as a result of a merge.`. [#35430](https://github.com/ClickHouse/ClickHouse/pull/35430) ([alesapin](https://github.com/alesapin)). +* Fix excessive logging when using S3 as backend for MergeTree or as separate table engine/function. Fixes [#30559](https://github.com/ClickHouse/ClickHouse/issues/30559). [#35434](https://github.com/ClickHouse/ClickHouse/pull/35434) ([alesapin](https://github.com/alesapin)). +* Fix wrong result of datetime64 when negative. Close [#34831](https://github.com/ClickHouse/ClickHouse/issues/34831). [#35440](https://github.com/ClickHouse/ClickHouse/pull/35440) ([李扬](https://github.com/taiyang-li)). +* Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector to DB::ColumnVector'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug in Keeper which can lead to unstable client connections. Introduced in [#35031](https://github.com/ClickHouse/ClickHouse/issues/35031). [#35498](https://github.com/ClickHouse/ClickHouse/pull/35498) ([alesapin](https://github.com/alesapin)). +* Fix crash for function `throwIf` with constant arguments. [#35500](https://github.com/ClickHouse/ClickHouse/pull/35500) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash during short circuit function evaluation when one of arguments is nullable constant. Closes [#35497](https://github.com/ClickHouse/ClickHouse/issues/35497). Closes [#35496](https://github.com/ClickHouse/ClickHouse/issues/35496). [#35502](https://github.com/ClickHouse/ClickHouse/pull/35502) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix parsing of IPv6 addresses longer than 39 characters. Closes [#34022](https://github.com/ClickHouse/ClickHouse/issues/34022). [#35539](https://github.com/ClickHouse/ClickHouse/pull/35539) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed return type deduction for `caseWithExpression`. The type of the ELSE branch is now correctly taken into account. [#35576](https://github.com/ClickHouse/ClickHouse/pull/35576) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix s3 engine getting virtual columns. Closes [#35411](https://github.com/ClickHouse/ClickHouse/issues/35411). [#35586](https://github.com/ClickHouse/ClickHouse/pull/35586) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix version string setting in version_helper.py. [#35589](https://github.com/ClickHouse/ClickHouse/pull/35589) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix headers with named collections, add compression_method. Closes [#35273](https://github.com/ClickHouse/ClickHouse/issues/35273). Closes [#35269](https://github.com/ClickHouse/ClickHouse/issues/35269). [#35593](https://github.com/ClickHouse/ClickHouse/pull/35593) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Setting `database_atomic_wait_for_drop_and_detach_synchronously` worked incorrectly for `ATTACH TABLE` query when previously detached table is still in use, It's fixed. [#35594](https://github.com/ClickHouse/ClickHouse/pull/35594) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible segfault in materialised postgresql which happened if exception occurred when data, collected in memory, was synced into underlying tables. Closes [#35611](https://github.com/ClickHouse/ClickHouse/issues/35611). [#35614](https://github.com/ClickHouse/ClickHouse/pull/35614) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix `HashJoin` when columns with `LowCardinality` type are used. This closes [#35548](https://github.com/ClickHouse/ClickHouse/issues/35548). [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616) ([Antonio Andelic](https://github.com/antonio2368)). +* Check remote_url_allow_hosts before schema inference in URL engine Closes [#35064](https://github.com/ClickHouse/ClickHouse/issues/35064). [#35619](https://github.com/ClickHouse/ClickHouse/pull/35619) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix positional arguments with aliases. Closes [#35600](https://github.com/ClickHouse/ClickHouse/issues/35600). [#35620](https://github.com/ClickHouse/ClickHouse/pull/35620) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection analysis which might lead to wrong query result when IN subquery is used. This fixes [#35336](https://github.com/ClickHouse/ClickHouse/issues/35336). [#35631](https://github.com/ClickHouse/ClickHouse/pull/35631) ([Amos Bird](https://github.com/amosbird)). +* Fix usage of quota with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)). +* Fix server crash when large number of arguments are passed into `format` function. Please refer to the test file and see how to reproduce the crash. [#35651](https://github.com/ClickHouse/ClickHouse/pull/35651) ([Amos Bird](https://github.com/amosbird)). +* Fix part checking logic for parts with projections. Error happened when projection and main part had different types. This is similar to https://github.com/ClickHouse/ClickHouse/pull/33774 . The bug is addressed by @caoyang10. [#35667](https://github.com/ClickHouse/ClickHouse/pull/35667) ([Amos Bird](https://github.com/amosbird)). +* Fix check asof join key nullability, close [#35565](https://github.com/ClickHouse/ClickHouse/issues/35565). [#35674](https://github.com/ClickHouse/ClickHouse/pull/35674) ([Vladimir C](https://github.com/vdimir)). +* Fix possible loss of subcolumns in type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)). +* Enable build with JIT compilation by default. [#35683](https://github.com/ClickHouse/ClickHouse/pull/35683) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible `Can't adjust last granule` exception while reading subcolumns of type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug in creating materialized view with subquery after server restart. Materialized view was not getting updated after inserts into underlying table after server restart. Closes [#35511](https://github.com/ClickHouse/ClickHouse/issues/35511). [#35691](https://github.com/ClickHouse/ClickHouse/pull/35691) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix dropping non-empty database in clickhouse local. Closes [#35692](https://github.com/ClickHouse/ClickHouse/issues/35692). [#35711](https://github.com/ClickHouse/ClickHouse/pull/35711) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix any/all(subquery) implementation. Closes [#35489](https://github.com/ClickHouse/ClickHouse/issues/35489). [#35727](https://github.com/ClickHouse/ClickHouse/pull/35727) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug in conversion from custom types to string that could lead to segfault or unexpected error messages. Closes [#35752](https://github.com/ClickHouse/ClickHouse/issues/35752). [#35755](https://github.com/ClickHouse/ClickHouse/pull/35755) ([Kruglov Pavel](https://github.com/Avogar)). +* Now metadata for broken parts will be removed from metadata cache (introduced in [#32928](https://github.com/ClickHouse/ClickHouse/issues/32928)) on server start. [#35759](https://github.com/ClickHouse/ClickHouse/pull/35759) ([chen9t](https://github.com/chen9t)). +* fix filebuffer pos in RemoteReadBuffer When RemoteReadBuffer is consumed, its pos will increase, for example in HadoopSnappyReadBuffer::nextImpl. ![image](https://user-images.githubusercontent.com/80669699/160880640-8535a701-63bd-42e9-a2c2-e5f215bcd96b.png). [#35771](https://github.com/ClickHouse/ClickHouse/pull/35771) ([shuchaome](https://github.com/shuchaome)). +* Fixes parsing of the arguments of the functions `extract`. Fixes [#35751](https://github.com/ClickHouse/ClickHouse/issues/35751). [#35799](https://github.com/ClickHouse/ClickHouse/pull/35799) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix bug in indexes of not presented columns in -WithNames formats that led to error `INCORRECT_NUMBER_OF_COLUMNS ` when the number of columns is more than 256. Closes [#35793](https://github.com/ClickHouse/ClickHouse/issues/35793). [#35803](https://github.com/ClickHouse/ClickHouse/pull/35803) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inserts to columns of type `Object` in case when there is data related to several partitions in insert query. [#35806](https://github.com/ClickHouse/ClickHouse/pull/35806) ([Anton Popov](https://github.com/CurtizJ)). +* Respect only quota & period from groups, ignore shares (which are not really limit the number of the cores which can be used). [#35815](https://github.com/ClickHouse/ClickHouse/pull/35815) ([filimonov](https://github.com/filimonov)). +* Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)). +* fix issue: [#34966](https://github.com/ClickHouse/ClickHouse/issues/34966). [#35840](https://github.com/ClickHouse/ClickHouse/pull/35840) ([zzsmdfj](https://github.com/zzsmdfj)). +* Disable `session_log` because memory safety issue has been found by fuzzing. See [#35714](https://github.com/ClickHouse/ClickHouse/issues/35714). [#35873](https://github.com/ClickHouse/ClickHouse/pull/35873) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix formatting of INSERT INFILE queries (missing quotes). [#35886](https://github.com/ClickHouse/ClickHouse/pull/35886) ([Azat Khuzhin](https://github.com/azat)). +* Fixed GA not reporting events. [#35935](https://github.com/ClickHouse/ClickHouse/pull/35935) ([peledni](https://github.com/peledni)). +* Fix reading from `Kafka` tables when `kafka_num_consumers > 1` and `kafka_thread_per_consumer = 0`. Returns parallel & multithreaded reading, accidentally broken in 21.11. Closes [#35153](https://github.com/ClickHouse/ClickHouse/issues/35153). [#35973](https://github.com/ClickHouse/ClickHouse/pull/35973) ([filimonov](https://github.com/filimonov)). +* Fix performance regression of scalar query optimization. [#35986](https://github.com/ClickHouse/ClickHouse/pull/35986) ([Amos Bird](https://github.com/amosbird)). +* Fix error while moving table with `JOIN` engine from `Ordinary` database to `Atomic`, close [#35686](https://github.com/ClickHouse/ClickHouse/issues/35686). [#35995](https://github.com/ClickHouse/ClickHouse/pull/35995) ([Vladimir C](https://github.com/vdimir)). +* Fix error `Empty list of columns in SELECT query` in CROSS JOIN close [#35672](https://github.com/ClickHouse/ClickHouse/issues/35672). [#36033](https://github.com/ClickHouse/ClickHouse/pull/36033) ([Vladimir C](https://github.com/vdimir)). +* Fix possible incorrect result of `WINDOW` functions in queries with `LIMIT` which was caused by wrong limit-push-down query plan optimization. Fixes [#36071](https://github.com/ClickHouse/ClickHouse/issues/36071) and [#23125](https://github.com/ClickHouse/ClickHouse/issues/23125). [#36075](https://github.com/ClickHouse/ClickHouse/pull/36075) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Throw an exception when CH cannot execute a file instead of displaying success and silently failing. [#36088](https://github.com/ClickHouse/ClickHouse/pull/36088) ([Julian Gilyadov](https://github.com/israelg99)). +* Fix window view when is proc time and window kind larger than day, see code comment. [#36109](https://github.com/ClickHouse/ClickHouse/pull/36109) ([flynn](https://github.com/ucasfl)). +* Fix bug of read buffer from hdfs. ReadBufferFromHDFSImpl::offset was misused as offset of working_buffer, but it is file offset. cc @kssenii. [#36153](https://github.com/ClickHouse/ClickHouse/pull/36153) ([李扬](https://github.com/taiyang-li)). +* Fix crash in ParallelReadBuffer. [#36169](https://github.com/ClickHouse/ClickHouse/pull/36169) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to convert empty strings to empty values of type `Objects`. [#36179](https://github.com/ClickHouse/ClickHouse/pull/36179) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible segfault in schema inference for JSON formats. [#36195](https://github.com/ClickHouse/ClickHouse/pull/36195) ([Kruglov Pavel](https://github.com/Avogar)). +* `CREATE TABLE ... AS` might fail with `Replica ... already exists` even if `ReplicatedMergeTree` table was created with default arguments. It's fixed. Now `{uuid}` macro is not unfolded when saving table metadata. Therefore, it's not allowed to move `ReplicatedMergeTree` table from `Atomic` to `Ordinary` database if `zookeeper_path` contains `{uuid}` macro (or table was created with default engine arguments). Fixes [#35577](https://github.com/ClickHouse/ClickHouse/issues/35577). [#36200](https://github.com/ClickHouse/ClickHouse/pull/36200) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix reading of empty arrays in reverse order (in queries with descending sorting by prefix of primary key). [#36215](https://github.com/ClickHouse/ClickHouse/pull/36215) ([Anton Popov](https://github.com/CurtizJ)). +* Play UI was not able to display some resultsets, for example `SELECT * FROM dish`. [#36283](https://github.com/ClickHouse/ClickHouse/pull/36283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in ReadBufferFromHDFS in debug mode. image. [#36287](https://github.com/ClickHouse/ClickHouse/pull/36287) ([zhanglistar](https://github.com/zhanglistar)). +* Fix "Cannot find column" error for distributed queries with LIMIT BY. [#36454](https://github.com/ClickHouse/ClickHouse/pull/36454) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "[WIP] New row policies"'. [#35454](https://github.com/ClickHouse/ClickHouse/pull/35454) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'remove ATOMIC_FLAG_INIT: depreciated in C++20 and warns in clang-14'. [#35785](https://github.com/ClickHouse/ClickHouse/pull/35785) ([Brendan Cox](https://github.com/justnoise)). +* NO CL ENTRY: 'Revert "Added support for schema inference for `hdfsCluster`"'. [#35802](https://github.com/ClickHouse/ClickHouse/pull/35802) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: '[Snyk] Security upgrade mkdocs from 1.1.2 to 1.3.0'. [#35864](https://github.com/ClickHouse/ClickHouse/pull/35864) ([Snyk bot](https://github.com/snyk-bot)). +* NO CL ENTRY: 'Revert "Format changes for new docs"'. [#35894](https://github.com/ClickHouse/ClickHouse/pull/35894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "clang-tidy report issues with Medium priority"'. [#35941](https://github.com/ClickHouse/ClickHouse/pull/35941) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix crash in ParallelReadBuffer"'. [#36210](https://github.com/ClickHouse/ClickHouse/pull/36210) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### Bug Fix (prestable release) + +* call RemoteQueryExecutor with original_query instead of an rewritten query, elimate the AMBIGUOUS_COLUMN_NAME exception. [#35748](https://github.com/ClickHouse/ClickHouse/pull/35748) ([lgbo](https://github.com/lgbo-ustc)). + diff --git a/docs/changelogs/v22.4.2.1-stable.md b/docs/changelogs/v22.4.2.1-stable.md new file mode 100644 index 00000000000..c40bf8d92c9 --- /dev/null +++ b/docs/changelogs/v22.4.2.1-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v22.4.2.1-stable FIXME as compared to v22.4.1.2305-prestable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix projection analysis which might lead to wrong query result when IN subquery is used. This fixes [#35336](https://github.com/ClickHouse/ClickHouse/issues/35336). [#35631](https://github.com/ClickHouse/ClickHouse/pull/35631) ([Amos Bird](https://github.com/amosbird)). + diff --git a/docs/changelogs/v22.4.3.3-stable.md b/docs/changelogs/v22.4.3.3-stable.md new file mode 100644 index 00000000000..5ab7872f880 --- /dev/null +++ b/docs/changelogs/v22.4.3.3-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v22.4.3.3-stable FIXME as compared to v22.4.2.1-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36582](https://github.com/ClickHouse/ClickHouse/issues/36582): Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). + diff --git a/docs/changelogs/v22.4.4.7-stable.md b/docs/changelogs/v22.4.4.7-stable.md new file mode 100644 index 00000000000..794082328df --- /dev/null +++ b/docs/changelogs/v22.4.4.7-stable.md @@ -0,0 +1,8 @@ +### ClickHouse release v22.4.4.7-stable FIXME as compared to v22.4.3.3-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#36582](https://github.com/ClickHouse/ClickHouse/issues/36582): Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#36673](https://github.com/ClickHouse/ClickHouse/issues/36673): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v22.4.5.9-stable.md b/docs/changelogs/v22.4.5.9-stable.md new file mode 100644 index 00000000000..63dfd117816 --- /dev/null +++ b/docs/changelogs/v22.4.5.9-stable.md @@ -0,0 +1,9 @@ +### ClickHouse release v22.4.5.9-stable FIXME as compared to v22.4.4.7-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#36635](https://github.com/ClickHouse/ClickHouse/issues/36635): Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#36794](https://github.com/ClickHouse/ClickHouse/issues/36794): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#36926](https://github.com/ClickHouse/ClickHouse/issues/36926): Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 7779057bb74..4aba0506c2e 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from CLickHouse and it is available to general public. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. ### Running a Test Locally {#functional-test-locally} @@ -30,7 +30,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes ### Adding a New Test -To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 2b49603bd0a..f31a78bc1c4 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -127,22 +127,22 @@ After that downloaded archives should be unpacked and installed with installatio LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) export LATEST_VERSION -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION-amd64.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION-amd64.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION-amd64.tgz" -tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-common-static-$LATEST_VERSION-amd64.tgz" sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz" sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-server-$LATEST_VERSION-amd64.tgz" sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-client-$LATEST_VERSION-amd64.tgz" sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ``` diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index f21858ccc25..e382bbcddd8 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -9,6 +9,7 @@ ClickHouse can accept and return data in various formats. A format supported for results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: + | Format | Input | Output | |-------------------------------------------------------------------------------------------|-------|--------| | [TabSeparated](#tabseparated) | ✔ | ✔ | @@ -195,7 +196,7 @@ This format is also available under the name `TSVWithNames`. Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. The first row with names is processed the same way as in `TabSeparatedWithNames` format. If setting [input_format_with_types_use_header](../operations/settings/settings.md#settings-input_format_with_types_use_header) is set to 1, -the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. +the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. This format is also available under the name `TSVWithNamesAndTypes`. @@ -790,7 +791,7 @@ The query `SELECT * FROM UserActivity FORMAT JSONEachRow` returns: Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 sequences. Values are escaped in the same way as for `JSON`. -:::info +:::info Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information. ::: @@ -1414,7 +1415,7 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` -:::warning +:::warning Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. ::: @@ -1631,7 +1632,7 @@ When working with the `Regexp` format, you can use the following settings: - Escaped (similarly to [TSV](#tabseparated)) - Quoted (similarly to [Values](#data-format-values)) - Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw)) - + - `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exeption in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. **Usage** diff --git a/docs/en/interfaces/postgresql.md b/docs/en/interfaces/postgresql.md index a9de9f8a284..a7fd33b38c1 100644 --- a/docs/en/interfaces/postgresql.md +++ b/docs/en/interfaces/postgresql.md @@ -5,7 +5,7 @@ sidebar_label: PostgreSQL Interface # PostgreSQL Interface -ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directy supported by ClickHouse (for example, Amazon Redshift). +ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directy supported by ClickHouse (for example, Amazon Redshift). To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder: @@ -53,5 +53,20 @@ default=> And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse. +:::caution +The PostgreSQL protocol currently only supports plain-text passwords. +::: + +## Using SSL + +If you have SSL/TLS configured on your ClickHouse instance, then `postgresql_port` will use the same settings (the port is shared for both secure and unsecure clients). + +Each client has their own method of how to connect using SSL. The following command demonstrates how to pass in the certificates and key to securely connect `psql` to ClickHouse: + +```bash +psql "port=9005 host=127.0.0.1 user=alice dbname=default sslcert=/path/to/certificate.pem sslkey=/path/to/key.pem sslrootcert=/path/to/rootcert.pem sslmode=verify-ca" +``` + +View the [PostgreSQL docs](https://jdbc.postgresql.org/documentation/head/ssl-client.html) for more details on their SSL settings. [Original article](https://clickhouse.com/docs/en/interfaces/postgresql) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index e4b1fdd3bbb..bd05f3b4ad2 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -13,7 +13,7 @@ Simhash is a hash function, which returns close hash values for close (similar) [Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. -``` sql +```sql halfMD5(par1, ...) ``` @@ -30,11 +30,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. **Example** -``` sql +```sql SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS halfMD5hash, toTypeName(halfMD5hash) AS type; ``` -``` text +```response ┌────────halfMD5hash─┬─type───┐ │ 186182704141653334 │ UInt64 │ └────────────────────┴────────┘ @@ -54,7 +54,7 @@ If you want to get the same result as output by the md5sum utility, use lower(he Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value. -``` sql +```sql sipHash64(par1,...) ``` @@ -77,11 +77,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. **Example** -``` sql +```sql SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS SipHash, toTypeName(SipHash) AS type; ``` -``` text +```response ┌──────────────SipHash─┬─type───┐ │ 13726873534472839665 │ UInt64 │ └──────────────────────┴────────┘ @@ -93,7 +93,7 @@ Produces a 128-bit [SipHash](https://131002.net/siphash/) hash value. Differs fr **Syntax** -``` sql +```sql sipHash128(par1,...) ``` @@ -111,13 +111,13 @@ Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). Query: -``` sql +```sql SELECT hex(sipHash128('foo', '\x01', 3)); ``` Result: -``` text +```response ┌─hex(sipHash128('foo', '', 3))────┐ │ 9DE516A64A414D4B1B609415E4523F24 │ └──────────────────────────────────┘ @@ -127,7 +127,7 @@ Result: Produces a 64-bit [CityHash](https://github.com/google/cityhash) hash value. -``` sql +```sql cityHash64(par1,...) ``` @@ -145,11 +145,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. Call example: -``` sql +```sql SELECT cityHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS CityHash, toTypeName(CityHash) AS type; ``` -``` text +```response ┌─────────────CityHash─┬─type───┐ │ 12072650598913549138 │ UInt64 │ └──────────────────────┴────────┘ @@ -157,7 +157,7 @@ SELECT cityHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 The following example shows how to compute the checksum of the entire table with accuracy up to the row order: -``` sql +```sql SELECT groupBitXor(cityHash64(*)) FROM table ``` @@ -177,7 +177,7 @@ Calculates SHA-1, SHA-224, SHA-256, SHA-512 hash from a string and returns the r **Syntax** -``` sql +```sql SHA1('s') ... SHA512('s') @@ -203,24 +203,62 @@ Use the [hex](../functions/encoding-functions.md#hex) function to represent the Query: -``` sql +```sql SELECT hex(SHA1('abc')); ``` Result: -``` text +```response ┌─hex(SHA1('abc'))─────────────────────────┐ │ A9993E364706816ABA3E25717850C26C9CD0D89D │ └──────────────────────────────────────────┘ ``` +## BLAKE3 {#blake3} + +Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). + +**Syntax** + +```sql +BLAKE3('s') +``` + +This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library. The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256. + +**Arguments** + +- s - input string for BLAKE3 hash calculation. [String](../data-types/string.md). + +**Return value** + +- BLAKE3 hash as a byte array with type FixedString(32). + +Type: [FixedString](../data-types/fixedstring.md). + +**Example** + +Use function [hex](../functions/encoding-functions.md#hex) to represent the result as a hex-encoded string. + +Query: +```sql +SELECT hex(BLAKE3('ABC')) +``` + +Result: +```sql +┌─hex(BLAKE3('ABC'))───────────────────────────────────────────────┐ +│ D1717274597CF0289694F75D96D444B992A096F1AFD8E7BBFA6EBB1D360FEDFC │ +└──────────────────────────────────────────────────────────────────┘ +``` + ## URLHash(url\[, N\]) {#urlhashurl-n} A fast, decent-quality non-cryptographic hash function for a string obtained from a URL using some type of normalization. `URLHash(s)` – Calculates a hash from a string without one of the trailing symbols `/`,`?` or `#` at the end, if present. `URLHash(s, N)` – Calculates a hash from a string up to the N level in the URL hierarchy, without one of the trailing symbols `/`,`?` or `#` at the end, if present. -Levels are the same as in URLHierarchy. +Levels are the same as in URLHierarchy. ## farmFingerprint64 {#farmfingerprint64} @@ -228,7 +266,7 @@ Levels are the same as in URLHierarchy. Produces a 64-bit [FarmHash](https://github.com/google/farmhash) or Fingerprint value. `farmFingerprint64` is preferred for a stable and portable value. -``` sql +```sql farmFingerprint64(par1, ...) farmHash64(par1, ...) ``` @@ -245,11 +283,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. **Example** -``` sql +```sql SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS FarmHash, toTypeName(FarmHash) AS type; ``` -``` text +```response ┌─────────────FarmHash─┬─type───┐ │ 17790458267262532859 │ UInt64 │ └──────────────────────┴────────┘ @@ -261,7 +299,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97 **Syntax** -``` sql +```sql SELECT javaHash('') ``` @@ -273,13 +311,13 @@ A `Int32` data type hash value. Query: -``` sql +```sql SELECT javaHash('Hello, world!'); ``` Result: -``` text +```response ┌─javaHash('Hello, world!')─┐ │ -1880044555 │ └───────────────────────────┘ @@ -291,7 +329,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97 **Syntax** -``` sql +```sql javaHashUTF16LE(stringUtf16le) ``` @@ -309,13 +347,13 @@ Correct query with UTF-16LE encoded string. Query: -``` sql +```sql SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')); ``` Result: -``` text +```response ┌─javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le'))─┐ │ 3556498 │ └──────────────────────────────────────────────────────────────┘ @@ -325,7 +363,7 @@ Result: Calculates `HiveHash` from a string. -``` sql +```sql SELECT hiveHash('') ``` @@ -341,13 +379,13 @@ Type: `hiveHash`. Query: -``` sql +```sql SELECT hiveHash('Hello, world!'); ``` Result: -``` text +```response ┌─hiveHash('Hello, world!')─┐ │ 267439093 │ └───────────────────────────┘ @@ -357,7 +395,7 @@ Result: Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/) hash value. -``` sql +```sql metroHash64(par1, ...) ``` @@ -371,11 +409,11 @@ A [UInt64](../../sql-reference/data-types/int-uint.md) data type hash value. **Example** -``` sql +```sql SELECT metroHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MetroHash, toTypeName(MetroHash) AS type; ``` -``` text +```response ┌────────────MetroHash─┬─type───┐ │ 14235658766382344533 │ UInt64 │ └──────────────────────┴────────┘ @@ -391,7 +429,7 @@ For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1 Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value. -``` sql +```sql murmurHash2_32(par1, ...) murmurHash2_64(par1, ...) ``` @@ -407,11 +445,11 @@ Both functions take a variable number of input parameters. Arguments can be any **Example** -``` sql +```sql SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MurmurHash2, toTypeName(MurmurHash2) AS type; ``` -``` text +```response ┌──────────MurmurHash2─┬─type───┐ │ 11832096901709403633 │ UInt64 │ └──────────────────────┴────────┘ @@ -423,7 +461,7 @@ Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash val **Syntax** -``` sql +```sql gccMurmurHash(par1, ...) ``` @@ -441,7 +479,7 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT gccMurmurHash(1, 2, 3) AS res1, gccMurmurHash(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) AS res2 @@ -449,7 +487,7 @@ SELECT Result: -``` text +```response ┌─────────────────res1─┬────────────────res2─┐ │ 12384823029245979431 │ 1188926775431157506 │ └──────────────────────┴─────────────────────┘ @@ -459,7 +497,7 @@ Result: Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value. -``` sql +```sql murmurHash3_32(par1, ...) murmurHash3_64(par1, ...) ``` @@ -475,11 +513,11 @@ Both functions take a variable number of input parameters. Arguments can be any **Example** -``` sql +```sql SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; ``` -``` text +```response ┌─MurmurHash3─┬─type───┐ │ 2152717 │ UInt32 │ └─────────────┴────────┘ @@ -491,7 +529,7 @@ Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash valu **Syntax** -``` sql +```sql murmurHash3_128(expr) ``` @@ -509,13 +547,13 @@ Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). Query: -``` sql +```sql SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); ``` Result: -``` text +```response ┌─hex(murmurHash3_128('foo', 'foo', 'foo'))─┐ │ F8F7AD9B6CD4CF117A71E277E2EC2931 │ └───────────────────────────────────────────┘ @@ -525,7 +563,7 @@ Result: Calculates `xxHash` from a string. It is proposed in two flavors, 32 and 64 bits. -``` sql +```sql SELECT xxHash32('') OR @@ -543,13 +581,13 @@ Type: `xxHash`. Query: -``` sql +```sql SELECT xxHash32('Hello, world!'); ``` Result: -``` text +```response ┌─xxHash32('Hello, world!')─┐ │ 834093149 │ └───────────────────────────┘ @@ -567,7 +605,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql ngramSimHash(string[, ngramsize]) ``` @@ -586,13 +624,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT ngramSimHash('ClickHouse') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 1627567969 │ └────────────┘ @@ -606,7 +644,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql ngramSimHashCaseInsensitive(string[, ngramsize]) ``` @@ -625,13 +663,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT ngramSimHashCaseInsensitive('ClickHouse') AS Hash; ``` Result: -``` text +```response ┌──────Hash─┐ │ 562180645 │ └───────────┘ @@ -645,7 +683,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql ngramSimHashUTF8(string[, ngramsize]) ``` @@ -664,13 +702,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT ngramSimHashUTF8('ClickHouse') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 1628157797 │ └────────────┘ @@ -684,7 +722,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) ``` @@ -703,13 +741,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT ngramSimHashCaseInsensitiveUTF8('ClickHouse') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 1636742693 │ └────────────┘ @@ -723,7 +761,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql wordShingleSimHash(string[, shinglesize]) ``` @@ -742,13 +780,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT wordShingleSimHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 2328277067 │ └────────────┘ @@ -762,7 +800,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql wordShingleSimHashCaseInsensitive(string[, shinglesize]) ``` @@ -781,13 +819,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT wordShingleSimHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 2194812424 │ └────────────┘ @@ -801,7 +839,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql wordShingleSimHashUTF8(string[, shinglesize]) ``` @@ -820,13 +858,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT wordShingleSimHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 2328277067 │ └────────────┘ @@ -840,7 +878,7 @@ Can be used for detection of semi-duplicate strings with [bitHammingDistance](.. **Syntax** -``` sql +```sql wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) ``` @@ -859,13 +897,13 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: -``` sql +```sql SELECT wordShingleSimHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Result: -``` text +```response ┌───────Hash─┐ │ 2194812424 │ └────────────┘ @@ -879,7 +917,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql ngramMinHash(string[, ngramsize, hashnum]) ``` @@ -899,13 +937,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT ngramMinHash('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (18333312859352735453,9054248444481805918) │ └────────────────────────────────────────────┘ @@ -919,7 +957,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) ``` @@ -939,13 +977,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT ngramMinHashCaseInsensitive('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (2106263556442004574,13203602793651726206) │ └────────────────────────────────────────────┘ @@ -959,7 +997,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql ngramMinHashUTF8(string[, ngramsize, hashnum]) ``` @@ -979,13 +1017,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT ngramMinHashUTF8('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (18333312859352735453,6742163577938632877) │ └────────────────────────────────────────────┘ @@ -999,7 +1037,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) ``` @@ -1019,13 +1057,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT ngramMinHashCaseInsensitiveUTF8('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple───────────────────────────────────────┐ │ (12493625717655877135,13203602793651726206) │ └─────────────────────────────────────────────┘ @@ -1037,7 +1075,7 @@ Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram **Syntax** -``` sql +```sql ngramMinHashArg(string[, ngramsize, hashnum]) ``` @@ -1057,13 +1095,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT ngramMinHashArg('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','Hou','kHo','use'),('Hou','lic','ick','ous','ckH','Cli')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1075,7 +1113,7 @@ Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram **Syntax** -``` sql +```sql ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) ``` @@ -1095,13 +1133,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT ngramMinHashArgCaseInsensitive('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','kHo','use','Cli'),('kHo','lic','ick','ous','ckH','Hou')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1113,7 +1151,7 @@ Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram **Syntax** -``` sql +```sql ngramMinHashArgUTF8(string[, ngramsize, hashnum]) ``` @@ -1133,13 +1171,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT ngramMinHashArgUTF8('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','Hou','kHo','use'),('kHo','Hou','lic','ick','ous','ckH')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1151,7 +1189,7 @@ Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram **Syntax** -``` sql +```sql ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) ``` @@ -1171,13 +1209,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT ngramMinHashArgCaseInsensitiveUTF8('ClickHouse') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ckH','ous','ick','lic','kHo','use'),('kHo','lic','ick','ous','ckH','Hou')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1191,7 +1229,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql wordShingleMinHash(string[, shinglesize, hashnum]) ``` @@ -1211,13 +1249,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT wordShingleMinHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (16452112859864147620,5844417301642981317) │ └────────────────────────────────────────────┘ @@ -1231,7 +1269,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) ``` @@ -1251,13 +1289,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT wordShingleMinHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────┐ │ (3065874883688416519,1634050779997673240) │ └───────────────────────────────────────────┘ @@ -1271,7 +1309,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql wordShingleMinHashUTF8(string[, shinglesize, hashnum]) ``` @@ -1291,13 +1329,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT wordShingleMinHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (16452112859864147620,5844417301642981317) │ └────────────────────────────────────────────┘ @@ -1311,7 +1349,7 @@ Can be used for detection of semi-duplicate strings with [tupleHammingDistance]( **Syntax** -``` sql +```sql wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) ``` @@ -1331,13 +1369,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-refere Query: -``` sql +```sql SELECT wordShingleMinHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────┐ │ (3065874883688416519,1634050779997673240) │ └───────────────────────────────────────────┘ @@ -1349,7 +1387,7 @@ Splits a ASCII string into parts (shingles) of `shinglesize` words each and retu **Syntax** -``` sql +```sql wordShingleMinHashArg(string[, shinglesize, hashnum]) ``` @@ -1369,13 +1407,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT wordShingleMinHashArg('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────┐ │ (('OLAP','database','analytical'),('online','oriented','processing')) │ └───────────────────────────────────────────────────────────────────────┘ @@ -1387,7 +1425,7 @@ Splits a ASCII string into parts (shingles) of `shinglesize` words each and retu **Syntax** -``` sql +```sql wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) ``` @@ -1407,13 +1445,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT wordShingleMinHashArgCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────────────────────────────────┐ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ @@ -1425,7 +1463,7 @@ Splits a UTF-8 string into parts (shingles) of `shinglesize` words each and retu **Syntax** -``` sql +```sql wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) ``` @@ -1445,13 +1483,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT wordShingleMinHashArgUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Result: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────┐ │ (('OLAP','database','analytical'),('online','oriented','processing')) │ └───────────────────────────────────────────────────────────────────────┘ @@ -1463,7 +1501,7 @@ Splits a UTF-8 string into parts (shingles) of `shinglesize` words each and retu **Syntax** -``` sql +```sql wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) ``` @@ -1483,13 +1521,13 @@ Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-referen Query: -``` sql +```sql SELECT wordShingleMinHashArgCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Result: -``` text +```response ┌─Tuple──────────────────────────────────────────────────────────────────┐ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index e9a15995a16..e53ea41d606 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -11,7 +11,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | --------| ----------| | ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported | | expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) | -| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported | +| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported | | `ROWS` frame | supported | | `RANGE` frame | supported, the default | | `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead | diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md index 12091fcbc3f..0065275519b 100644 --- a/docs/ru/sql-reference/functions/hash-functions.md +++ b/docs/ru/sql-reference/functions/hash-functions.md @@ -13,7 +13,7 @@ Simhash – это хеш-функция, которая для близких [Интерпретирует](../../sql-reference/functions/hash-functions.md#type_conversion_functions-reinterpretAsString) все входные параметры как строки и вычисляет хэш [MD5](https://ru.wikipedia.org/wiki/MD5) для каждой из них. Затем объединяет хэши, берет первые 8 байт хэша результирующей строки и интерпретирует их как значение типа `UInt64` с big-endian порядком байтов. -``` sql +```sql halfMD5(par1, ...) ``` @@ -30,11 +30,11 @@ halfMD5(par1, ...) **Пример** -``` sql +```sql SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS halfMD5hash, toTypeName(halfMD5hash) AS type; ``` -``` text +```response ┌────────halfMD5hash─┬─type───┐ │ 186182704141653334 │ UInt64 │ └────────────────────┴────────┘ @@ -54,7 +54,7 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00') Генерирует 64-х битное значение [SipHash](https://131002.net/siphash/). -``` sql +```sql sipHash64(par1,...) ``` @@ -77,11 +77,11 @@ sipHash64(par1,...) **Пример** -``` sql +```sql SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS SipHash, toTypeName(SipHash) AS type; ``` -``` text +```response ┌──────────────SipHash─┬─type───┐ │ 13726873534472839665 │ UInt64 │ └──────────────────────┴────────┘ @@ -93,7 +93,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 **Синтаксис** -``` sql +```sql sipHash128(par1,...) ``` @@ -111,13 +111,13 @@ sipHash128(par1,...) Запрос: -``` sql +```sql SELECT hex(sipHash128('foo', '\x01', 3)); ``` Результат: -``` text +```response ┌─hex(sipHash128('foo', '', 3))────┐ │ 9DE516A64A414D4B1B609415E4523F24 │ └──────────────────────────────────┘ @@ -127,7 +127,7 @@ SELECT hex(sipHash128('foo', '\x01', 3)); Генерирует 64-х битное значение [CityHash](https://github.com/google/cityhash). -``` sql +```sql cityHash64(par1,...) ``` @@ -145,11 +145,11 @@ cityHash64(par1,...) Пример вызова: -``` sql +```sql SELECT cityHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS CityHash, toTypeName(CityHash) AS type; ``` -``` text +```response ┌─────────────CityHash─┬─type───┐ │ 12072650598913549138 │ UInt64 │ └──────────────────────┴────────┘ @@ -157,7 +157,7 @@ SELECT cityHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 А вот так вы можете вычислить чексумму всей таблицы с точностью до порядка строк: -``` sql +```sql SELECT groupBitXor(cityHash64(*)) FROM table ``` @@ -177,7 +177,7 @@ SELECT groupBitXor(cityHash64(*)) FROM table **Синтаксис** -``` sql +```sql SHA1('s') ... SHA512('s') @@ -203,18 +203,56 @@ SHA512('s') Запрос: -``` sql +```sql SELECT hex(SHA1('abc')); ``` Результат: -``` text +```response ┌─hex(SHA1('abc'))─────────────────────────┐ │ A9993E364706816ABA3E25717850C26C9CD0D89D │ └──────────────────────────────────────────┘ ``` +## BLAKE3 {#blake3} + +Вычисляет BLAKE3 хеш строки и возвращает полученный набор байт в виде [FixedString](../data-types/fixedstring.md). + +**Синтаксис** + +```sql +BLAKE3('s') +``` + +Данная криптографическая функция интегрирована в ClickHouse из Rust-библиотеки. Функция работает сравнительно быстро, показывая в 2 раза более быстрые результаты по сравнению с SHA-2, генерируя хеши аналогичной SHA-256 длины. + +**Параметры** + +- s - входная строка для вычисления хеша BLAKE3. [String](../data-types/string.md). + +**Возвращаемое значение** + +- Хеш BLAKE3 в виде шестнадцатеричной строки, имеющей тип FixedString(32). + +Тип: [FixedString](../data-types/fixedstring.md). + +**Пример** + +Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой. + +Запрос: +```sql +SELECT hex(BLAKE3('ABC')) +``` + +Результат: +```response +┌─hex(BLAKE3('ABC'))───────────────────────────────────────────────┐ +│ D1717274597CF0289694F75D96D444B992A096F1AFD8E7BBFA6EBB1D360FEDFC │ +└──────────────────────────────────────────────────────────────────┘ +``` + ## URLHash(url\[, N\]) {#urlhashurl-n} Быстрая не криптографическая хэш-функция неплохого качества для строки, полученной из URL путём некоторой нормализации. @@ -228,7 +266,7 @@ SELECT hex(SHA1('abc')); Создает 64-битное значение [FarmHash](https://github.com/google/farmhash), независимое от платформы (архитектуры сервера), что важно, если значения сохраняются или используются для разбиения данных на группы. -``` sql +```sql farmFingerprint64(par1, ...) farmHash64(par1, ...) ``` @@ -245,11 +283,11 @@ farmHash64(par1, ...) **Пример** -``` sql +```sql SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS FarmHash, toTypeName(FarmHash) AS type; ``` -``` text +```response ┌─────────────FarmHash─┬─type───┐ │ 17790458267262532859 │ UInt64 │ └──────────────────────┴────────┘ @@ -259,7 +297,7 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 Вычисляет [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) от строки. `JavaHash` не отличается ни скоростью, ни качеством, поэтому эту функцию следует считать устаревшей. Используйте эту функцию, если вам необходимо получить значение хэша по такому же алгоритму. -``` sql +```sql SELECT javaHash('') ``` @@ -273,13 +311,13 @@ SELECT javaHash('') Запрос: -``` sql +```sql SELECT javaHash('Hello, world!'); ``` Результат: -``` text +```response ┌─javaHash('Hello, world!')─┐ │ -1880044555 │ └───────────────────────────┘ @@ -291,7 +329,7 @@ SELECT javaHash('Hello, world!'); **Синтаксис** -``` sql +```sql javaHashUTF16LE(stringUtf16le) ``` @@ -311,13 +349,13 @@ javaHashUTF16LE(stringUtf16le) Запрос: -``` sql +```sql SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')); ``` Результат: -``` text +```response ┌─javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le'))─┐ │ 3556498 │ └──────────────────────────────────────────────────────────────┘ @@ -327,7 +365,7 @@ SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')); Вычисляет `HiveHash` от строки. -``` sql +```sql SELECT hiveHash('') ``` @@ -343,13 +381,13 @@ SELECT hiveHash('') Запрос: -``` sql +```sql SELECT hiveHash('Hello, world!'); ``` Результат: -``` text +```response ┌─hiveHash('Hello, world!')─┐ │ 267439093 │ └───────────────────────────┘ @@ -359,7 +397,7 @@ SELECT hiveHash('Hello, world!'); Генерирует 64-х битное значение [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/). -``` sql +```sql metroHash64(par1, ...) ``` @@ -373,11 +411,11 @@ metroHash64(par1, ...) **Пример** -``` sql +```sql SELECT metroHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MetroHash, toTypeName(MetroHash) AS type; ``` -``` text +```response ┌────────────MetroHash─┬─type───┐ │ 14235658766382344533 │ UInt64 │ └──────────────────────┴────────┘ @@ -393,7 +431,7 @@ SELECT metroHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00: Генерирует значение [MurmurHash2](https://github.com/aappleby/smhasher). -``` sql +```sql murmurHash2_32(par1, ...) murmurHash2_64(par1, ...) ``` @@ -409,11 +447,11 @@ murmurHash2_64(par1, ...) **Пример** -``` sql +```sql SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MurmurHash2, toTypeName(MurmurHash2) AS type; ``` -``` text +```response ┌──────────MurmurHash2─┬─type───┐ │ 11832096901709403633 │ UInt64 │ └──────────────────────┴────────┘ @@ -425,7 +463,7 @@ SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23: **Синтаксис** -``` sql +```sql gccMurmurHash(par1, ...); ``` @@ -443,7 +481,7 @@ gccMurmurHash(par1, ...); Запрос: -``` sql +```sql SELECT gccMurmurHash(1, 2, 3) AS res1, gccMurmurHash(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) AS res2 @@ -451,7 +489,7 @@ SELECT Результат: -``` text +```response ┌─────────────────res1─┬────────────────res2─┐ │ 12384823029245979431 │ 1188926775431157506 │ └──────────────────────┴─────────────────────┘ @@ -461,7 +499,7 @@ SELECT Генерирует значение [MurmurHash3](https://github.com/aappleby/smhasher). -``` sql +```sql murmurHash3_32(par1, ...) murmurHash3_64(par1, ...) ``` @@ -477,11 +515,11 @@ murmurHash3_64(par1, ...) **Пример** -``` sql +```sql SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; ``` -``` text +```response ┌─MurmurHash3─┬─type───┐ │ 2152717 │ UInt32 │ └─────────────┴────────┘ @@ -493,7 +531,7 @@ SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23: **Синтаксис** -``` sql +```sql murmurHash3_128(expr) ``` @@ -511,13 +549,13 @@ murmurHash3_128(expr) Запрос: -``` sql +```sql SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); ``` Результат: -``` text +```response ┌─hex(murmurHash3_128('foo', 'foo', 'foo'))─┐ │ F8F7AD9B6CD4CF117A71E277E2EC2931 │ └───────────────────────────────────────────┘ @@ -527,7 +565,7 @@ SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); Вычисляет `xxHash` от строки. Предлагается в двух вариантах: 32 и 64 бита. -``` sql +```sql SELECT xxHash32('') OR @@ -545,13 +583,13 @@ SELECT xxHash64('') Запрос: -``` sql +```sql SELECT xxHash32('Hello, world!'); ``` Результат: -``` text +```response ┌─xxHash32('Hello, world!')─┐ │ 834093149 │ └───────────────────────────┘ @@ -569,7 +607,7 @@ SELECT xxHash32('Hello, world!'); **Синтаксис** -``` sql +```sql ngramSimHash(string[, ngramsize]) ``` @@ -588,13 +626,13 @@ ngramSimHash(string[, ngramsize]) Запрос: -``` sql +```sql SELECT ngramSimHash('ClickHouse') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 1627567969 │ └────────────┘ @@ -608,7 +646,7 @@ SELECT ngramSimHash('ClickHouse') AS Hash; **Синтаксис** -``` sql +```sql ngramSimHashCaseInsensitive(string[, ngramsize]) ``` @@ -627,13 +665,13 @@ ngramSimHashCaseInsensitive(string[, ngramsize]) Запрос: -``` sql +```sql SELECT ngramSimHashCaseInsensitive('ClickHouse') AS Hash; ``` Результат: -``` text +```response ┌──────Hash─┐ │ 562180645 │ └───────────┘ @@ -647,7 +685,7 @@ SELECT ngramSimHashCaseInsensitive('ClickHouse') AS Hash; **Синтаксис** -``` sql +```sql ngramSimHashUTF8(string[, ngramsize]) ``` @@ -666,13 +704,13 @@ ngramSimHashUTF8(string[, ngramsize]) Запрос: -``` sql +```sql SELECT ngramSimHashUTF8('ClickHouse') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 1628157797 │ └────────────┘ @@ -686,7 +724,7 @@ SELECT ngramSimHashUTF8('ClickHouse') AS Hash; **Синтаксис** -``` sql +```sql ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) ``` @@ -705,13 +743,13 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) Запрос: -``` sql +```sql SELECT ngramSimHashCaseInsensitiveUTF8('ClickHouse') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 1636742693 │ └────────────┘ @@ -725,7 +763,7 @@ SELECT ngramSimHashCaseInsensitiveUTF8('ClickHouse') AS Hash; **Синтаксис** -``` sql +```sql wordShingleSimHash(string[, shinglesize]) ``` @@ -744,13 +782,13 @@ wordShingleSimHash(string[, shinglesize]) Запрос: -``` sql +```sql SELECT wordShingleSimHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 2328277067 │ └────────────┘ @@ -764,7 +802,7 @@ SELECT wordShingleSimHash('ClickHouse® is a column-oriented database management **Синтаксис** -``` sql +```sql wordShingleSimHashCaseInsensitive(string[, shinglesize]) ``` @@ -783,13 +821,13 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize]) Запрос: -``` sql +```sql SELECT wordShingleSimHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 2194812424 │ └────────────┘ @@ -803,7 +841,7 @@ SELECT wordShingleSimHashCaseInsensitive('ClickHouse® is a column-oriented data **Синтаксис** -``` sql +```sql wordShingleSimHashUTF8(string[, shinglesize]) ``` @@ -822,13 +860,13 @@ wordShingleSimHashUTF8(string[, shinglesize]) Запрос: -``` sql +```sql SELECT wordShingleSimHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 2328277067 │ └────────────┘ @@ -842,7 +880,7 @@ SELECT wordShingleSimHashUTF8('ClickHouse® is a column-oriented database manage **Синтаксис** -``` sql +```sql wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) ``` @@ -861,13 +899,13 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) Запрос: -``` sql +```sql SELECT wordShingleSimHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash; ``` Результат: -``` text +```response ┌───────Hash─┐ │ 2194812424 │ └────────────┘ @@ -881,7 +919,7 @@ SELECT wordShingleSimHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented **Синтаксис** -``` sql +```sql ngramMinHash(string[, ngramsize, hashnum]) ``` @@ -901,13 +939,13 @@ ngramMinHash(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHash('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (18333312859352735453,9054248444481805918) │ └────────────────────────────────────────────┘ @@ -921,7 +959,7 @@ SELECT ngramMinHash('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) ``` @@ -941,13 +979,13 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashCaseInsensitive('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (2106263556442004574,13203602793651726206) │ └────────────────────────────────────────────┘ @@ -960,7 +998,7 @@ SELECT ngramMinHashCaseInsensitive('ClickHouse') AS Tuple; Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают. **Синтаксис** -``` sql +```sql ngramMinHashUTF8(string[, ngramsize, hashnum]) ``` @@ -980,13 +1018,13 @@ ngramMinHashUTF8(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashUTF8('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (18333312859352735453,6742163577938632877) │ └────────────────────────────────────────────┘ @@ -1000,7 +1038,7 @@ SELECT ngramMinHashUTF8('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) ``` @@ -1020,13 +1058,13 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashCaseInsensitiveUTF8('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple───────────────────────────────────────┐ │ (12493625717655877135,13203602793651726206) │ └─────────────────────────────────────────────┘ @@ -1038,7 +1076,7 @@ SELECT ngramMinHashCaseInsensitiveUTF8('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashArg(string[, ngramsize, hashnum]) ``` @@ -1058,13 +1096,13 @@ ngramMinHashArg(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashArg('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','Hou','kHo','use'),('Hou','lic','ick','ous','ckH','Cli')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1076,7 +1114,7 @@ SELECT ngramMinHashArg('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) ``` @@ -1096,13 +1134,13 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashArgCaseInsensitive('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','kHo','use','Cli'),('kHo','lic','ick','ous','ckH','Hou')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1114,7 +1152,7 @@ SELECT ngramMinHashArgCaseInsensitive('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashArgUTF8(string[, ngramsize, hashnum]) ``` @@ -1134,13 +1172,13 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashArgUTF8('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ous','ick','lic','Hou','kHo','use'),('kHo','Hou','lic','ick','ous','ckH')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1152,7 +1190,7 @@ SELECT ngramMinHashArgUTF8('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) ``` @@ -1172,13 +1210,13 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) Запрос: -``` sql +```sql SELECT ngramMinHashArgCaseInsensitiveUTF8('ClickHouse') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────────────┐ │ (('ckH','ous','ick','lic','kHo','use'),('kHo','lic','ick','ous','ckH','Hou')) │ └───────────────────────────────────────────────────────────────────────────────┘ @@ -1192,7 +1230,7 @@ SELECT ngramMinHashArgCaseInsensitiveUTF8('ClickHouse') AS Tuple; **Синтаксис** -``` sql +```sql wordShingleMinHash(string[, shinglesize, hashnum]) ``` @@ -1212,13 +1250,13 @@ wordShingleMinHash(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (16452112859864147620,5844417301642981317) │ └────────────────────────────────────────────┘ @@ -1232,7 +1270,7 @@ SELECT wordShingleMinHash('ClickHouse® is a column-oriented database management **Синтаксис** -``` sql +```sql wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) ``` @@ -1252,13 +1290,13 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────┐ │ (3065874883688416519,1634050779997673240) │ └───────────────────────────────────────────┘ @@ -1272,7 +1310,7 @@ SELECT wordShingleMinHashCaseInsensitive('ClickHouse® is a column-oriented data **Синтаксис** -``` sql +```sql wordShingleMinHashUTF8(string[, shinglesize, hashnum]) ``` @@ -1292,13 +1330,13 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────┐ │ (16452112859864147620,5844417301642981317) │ └────────────────────────────────────────────┘ @@ -1312,7 +1350,7 @@ SELECT wordShingleMinHashUTF8('ClickHouse® is a column-oriented database manage **Синтаксис** -``` sql +```sql wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) ``` @@ -1332,13 +1370,13 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────┐ │ (3065874883688416519,1634050779997673240) │ └───────────────────────────────────────────┘ @@ -1350,7 +1388,7 @@ SELECT wordShingleMinHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented **Синтаксис** -``` sql +```sql wordShingleMinHashArg(string[, shinglesize, hashnum]) ``` @@ -1370,13 +1408,13 @@ wordShingleMinHashArg(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashArg('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────┐ │ (('OLAP','database','analytical'),('online','oriented','processing')) │ └───────────────────────────────────────────────────────────────────────┘ @@ -1388,7 +1426,7 @@ SELECT wordShingleMinHashArg('ClickHouse® is a column-oriented database managem **Синтаксис** -``` sql +```sql wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) ``` @@ -1408,13 +1446,13 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashArgCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────────────────────────────────┐ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ @@ -1426,7 +1464,7 @@ SELECT wordShingleMinHashArgCaseInsensitive('ClickHouse® is a column-oriented d **Синтаксис** -``` sql +```sql wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) ``` @@ -1446,13 +1484,13 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashArgUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Результат: -``` text +```response ┌─Tuple─────────────────────────────────────────────────────────────────┐ │ (('OLAP','database','analytical'),('online','oriented','processing')) │ └───────────────────────────────────────────────────────────────────────┘ @@ -1464,7 +1502,7 @@ SELECT wordShingleMinHashArgUTF8('ClickHouse® is a column-oriented database man **Синтаксис** -``` sql +```sql wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) ``` @@ -1484,13 +1522,13 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) Запрос: -``` sql +```sql SELECT wordShingleMinHashArgCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple; ``` Результат: -``` text +```response ┌─Tuple──────────────────────────────────────────────────────────────────┐ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 1e6239a7fa6..74b45d411b0 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -34,6 +34,12 @@ struct ACL int32_t permissions; String scheme; String id; + + bool operator<(const ACL & other) const + { + return std::tuple(permissions, scheme, id) + < std::tuple(other.permissions, other.scheme, other.id); + } }; using ACLs = std::vector; diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index a661435a2eb..1481767add3 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -149,9 +149,12 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr serializeSnapshotMetadata(snapshot.snapshot_meta, out); writeBinary(snapshot.session_id, out); - /// Serialize ACLs MAP - writeBinary(snapshot.acl_map.size(), out); - for (const auto & [acl_id, acls] : snapshot.acl_map) + /// Better to sort before serialization, otherwise snapshots can be different on different replicas + std::vector> sorted_acl_map(snapshot.acl_map.begin(), snapshot.acl_map.end()); + std::sort(sorted_acl_map.begin(), sorted_acl_map.end()); + /// Serialize ACLs map + writeBinary(sorted_acl_map.size(), out); + for (const auto & [acl_id, acls] : sorted_acl_map) { writeBinary(acl_id, out); writeBinary(acls.size(), out); @@ -187,10 +190,16 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr ++it; } + /// Session must be saved in a sorted order, + /// otherwise snapshots will be different + std::vector> sorted_session_and_timeout(snapshot.session_and_timeout.begin(), snapshot.session_and_timeout.end()); + std::sort(sorted_session_and_timeout.begin(), sorted_session_and_timeout.end()); + /// Serialize sessions - size_t size = snapshot.session_and_timeout.size(); + size_t size = sorted_session_and_timeout.size(); + writeBinary(size, out); - for (const auto & [session_id, timeout] : snapshot.session_and_timeout) + for (const auto & [session_id, timeout] : sorted_session_and_timeout) { writeBinary(session_id, out); writeBinary(timeout, out); diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index d2545550c4f..cf4d1eaf9f2 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -1709,6 +1710,46 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) EXPECT_EQ(changelog4.next_slot(), 5); } +TEST_P(CoordinationTest, TestStorageSnapshotEqual) +{ + auto params = GetParam(); + ChangelogDirTest test("./snapshots"); + std::optional snapshot_hash; + for (size_t i = 0; i < 15; ++i) + { + DB::KeeperSnapshotManager manager("./snapshots", 3, params.enable_compression); + + DB::KeeperStorage storage(500, ""); + for (size_t j = 0; j < 5000; ++j) + { + addNode(storage, "/hello_" + std::to_string(j), "world", 1); + addNode(storage, "/hello/somepath_" + std::to_string(j), "somedata", 3); + } + + storage.session_id_counter = 5; + + storage.ephemerals[3] = {"/hello"}; + storage.ephemerals[1] = {"/hello/somepath"}; + + for (size_t j = 0; j < 3333; ++j) + storage.getSessionID(130 * j); + + DB::KeeperStorageSnapshot snapshot(&storage, storage.zxid); + + auto buf = manager.serializeSnapshotToBuffer(snapshot); + + auto new_hash = sipHash128(reinterpret_cast(buf->data()), buf->size()); + if (!snapshot_hash.has_value()) + { + snapshot_hash = new_hash; + } + else + { + EXPECT_EQ(*snapshot_hash, new_hash); + } + } +} + TEST_P(CoordinationTest, TestLogGap) { diff --git a/src/Core/ColumnNumbers.h b/src/Core/ColumnNumbers.h index 9441f6485a7..29b4c49dc83 100644 --- a/src/Core/ColumnNumbers.h +++ b/src/Core/ColumnNumbers.h @@ -8,5 +8,6 @@ namespace DB { using ColumnNumbers = std::vector; +using ColumnNumbersList = std::vector; } diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index 3ac9ad2fa02..2719017a726 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -109,6 +109,8 @@ public: std::optional tryGetByName(const std::string & name) const; }; +using NamesAndTypesLists = std::vector; + } namespace std diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 993e7b759b0..7ac59b35325 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -726,7 +726,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \ M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \ \ - M(Bool, cross_to_inner_join_rewrite, true, "Use inner join instead of comma/cross join if possible", 0) \ + M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if possible. Possible values: 0 - no rewrite, 1 - apply if possible, 2 - force rewrite all cross joins", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ \ diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index debe7fac8a5..a84e5a3f526 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -13,6 +13,7 @@ add_library(clickhouse_functions ${clickhouse_functions_sources}) target_link_libraries(clickhouse_functions PUBLIC + ch_contrib::wyhash ch_contrib::cityhash ch_contrib::farmhash clickhouse_dictionaries diff --git a/src/Functions/FunctionsHashing.cpp b/src/Functions/FunctionsHashing.cpp index cbafd4bcec2..901234e5443 100644 --- a/src/Functions/FunctionsHashing.cpp +++ b/src/Functions/FunctionsHashing.cpp @@ -39,5 +39,7 @@ void registerFunctionsHashing(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + + factory.registerFunction(); } } diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 750c247b518..c6e66a3d46d 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "config_functions.h" #include "config_core.h" @@ -1369,6 +1370,29 @@ private: } }; +struct ImplWyHash64 +{ + static constexpr auto name = "wyHash64"; + using ReturnType = UInt64; + + static UInt64 apply(const char * s, const size_t len) + { + return wyhash(s, len, 0, _wyp); + } + static UInt64 combineHashes(UInt64 h1, UInt64 h2) + { + union + { + UInt64 u64[2]; + char chars[16]; + }; + u64[0] = h1; + u64[1] = h2; + return apply(chars, 16); + } + + static constexpr bool use_int_hash_for_pods = false; +}; struct NameIntHash32 { static constexpr auto name = "intHash32"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; }; @@ -1406,4 +1430,6 @@ using FunctionHiveHash = FunctionAnyHash; using FunctionXxHash32 = FunctionAnyHash; using FunctionXxHash64 = FunctionAnyHash; +using FunctionWyHash64 = FunctionAnyHash; + } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 9471b5d319b..342a512ee52 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -151,6 +151,13 @@ void WriteBufferFromS3::allocateBuffer() WriteBufferFromS3::~WriteBufferFromS3() { +#ifndef NDEBUG + if (!is_finalized) + { + LOG_ERROR(log, "WriteBufferFromS3 is not finalized in destructor. It's a bug"); + std::terminate(); + } +#else try { finalize(); @@ -159,6 +166,7 @@ WriteBufferFromS3::~WriteBufferFromS3() { tryLogCurrentException(__PRETTY_FUNCTION__); } +#endif } bool WriteBufferFromS3::cacheEnabled() const @@ -192,6 +200,8 @@ void WriteBufferFromS3::finalizeImpl() if (!multipart_upload_id.empty()) completeMultipartUpload(); + + is_finalized = true; } void WriteBufferFromS3::createMultipartUpload() diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 19faf0b1488..6279e519be0 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -47,7 +47,7 @@ class WriteBufferFromFile; class WriteBufferFromS3 final : public BufferWithOwnMemory { public: - explicit WriteBufferFromS3( + WriteBufferFromS3( std::shared_ptr client_ptr_, const String & bucket_, const String & key_, @@ -105,6 +105,7 @@ private: std::vector part_tags; bool is_prefinalized = false; + bool is_finalized = false; /// Following fields are for background uploads in thread pool (if specified). /// We use std::function to avoid dependency of Interpreters diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 1ff82c8ea60..d11dfc3b8ad 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -214,7 +214,7 @@ public: ActionsDAGPtr clone() const; /// Execute actions for header. Input block must have empty columns. - /// Result should be equal to the execution of ExpressionActions build form this DAG. + /// Result should be equal to the execution of ExpressionActions built from this DAG. /// Actions are not changed, no expressions are compiled. /// /// In addition, check that result constants are constants according to DAG. diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index e7cf3c85a15..1806465db4a 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -991,9 +992,13 @@ public: } /// Only parameters that matter during merge. - Params(const Block & intermediate_header_, - const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_) - : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0, false, 0) + Params( + const Block & intermediate_header_, + const ColumnNumbers & keys_, + const AggregateDescriptions & aggregates_, + bool overflow_row_, + size_t max_threads_) + : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0, false, 0, {}, {}) { intermediate_header = intermediate_header_; } diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index ef1888345e6..d817988e7b6 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -286,8 +286,9 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string) /// Implementation of Clusters class -Clusters::Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix) +Clusters::Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, MultiVersion::Version macros, const String & config_prefix) { + this->macros_ = macros; updateClusters(config, settings, config_prefix); } @@ -296,7 +297,8 @@ ClusterPtr Clusters::getCluster(const std::string & cluster_name) const { std::lock_guard lock(mutex); - auto it = impl.find(cluster_name); + auto expanded_cluster_name = macros_->expand(cluster_name); + auto it = impl.find(expanded_cluster_name); return (it != impl.end()) ? it->second : nullptr; } diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index df781188d60..7c8d15d0350 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include @@ -290,7 +292,7 @@ using ClusterPtr = std::shared_ptr; class Clusters { public: - Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_prefix = "remote_servers"); + Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, MultiVersion::Version macros, const String & config_prefix = "remote_servers"); Clusters(const Clusters &) = delete; Clusters & operator=(const Clusters &) = delete; @@ -309,6 +311,8 @@ protected: /// setup outside of this class, stored to prevent deleting from impl on config update std::unordered_set automatic_clusters; + MultiVersion::Version macros_; + Impl impl; mutable std::mutex mutex; }; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0ef024f7f47..78371002535 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2334,7 +2334,7 @@ void Context::reloadClusterConfig() const } const auto & config = cluster_config ? *cluster_config : getConfigRef(); - auto new_clusters = std::make_shared(config, settings); + auto new_clusters = std::make_shared(config, settings, getMacros()); { std::lock_guard lock(shared->clusters_mutex); @@ -2356,7 +2356,7 @@ std::shared_ptr Context::getClusters() const if (!shared->clusters) { const auto & config = shared->clusters_config ? *shared->clusters_config : getConfigRef(); - shared->clusters = std::make_shared(config, settings); + shared->clusters = std::make_shared(config, settings, getMacros()); } return shared->clusters; @@ -2387,7 +2387,7 @@ void Context::setClustersConfig(const ConfigurationPtr & config, bool enable_dis shared->clusters_config = config; if (!shared->clusters) - shared->clusters = std::make_shared(*shared->clusters_config, settings, config_name); + shared->clusters = std::make_shared(*shared->clusters_config, settings, getMacros(), config_name); else shared->clusters->updateClusters(*shared->clusters_config, settings, config_name, old_clusters_config); } diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index c986aa9ec5a..d438ea9394e 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -18,11 +18,14 @@ #include #include +#include + namespace DB { namespace ErrorCodes { + extern const int INCORRECT_QUERY; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; } @@ -232,11 +235,26 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da auto asts_to_join_on = moveExpressionToJoinOn(select.where(), joined_tables, data.tables_with_columns, data.aliases); for (size_t i = 1; i < joined_tables.size(); ++i) { + auto & joined = joined_tables[i]; + if (joined.tableJoin()->kind != ASTTableJoin::Kind::Cross) + continue; + + String query_before = queryToString(*joined.tableJoin()); + bool rewritten = false; const auto & expr_it = asts_to_join_on.find(i); if (expr_it != asts_to_join_on.end()) { - if (joined_tables[i].rewriteCrossToInner(makeOnExpression(expr_it->second))) - data.done = true; + ASTPtr on_expr = makeOnExpression(expr_it->second); + if (rewritten = joined.rewriteCrossToInner(on_expr); rewritten) + { + LOG_DEBUG(&Poco::Logger::get("CrossToInnerJoin"), "Rewritten '{}' to '{}'", query_before, queryToString(*joined.tableJoin())); + } + } + + if (data.cross_to_inner_join_rewrite > 1 && !rewritten) + { + throw Exception(ErrorCodes::INCORRECT_QUERY, "Failed to rewrite '{} WHERE {}' to INNER JOIN", + query_before, queryToString(select.where())); } } } diff --git a/src/Interpreters/CrossToInnerJoinVisitor.h b/src/Interpreters/CrossToInnerJoinVisitor.h index 885cf8162c1..704be42d3c1 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.h +++ b/src/Interpreters/CrossToInnerJoinVisitor.h @@ -18,8 +18,7 @@ public: const std::vector & tables_with_columns; const Aliases & aliases; const String current_database; - bool done = false; - bool cross_to_inner_join_rewrite = true; + UInt8 cross_to_inner_join_rewrite = 1; }; static bool needChildVisit(ASTPtr &, const ASTPtr &); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 0a156ba0b3e..01769742071 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -43,7 +43,9 @@ #include #include +#include +#include #include #include @@ -325,6 +327,12 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) { NameSet unique_keys; ASTs & group_asts = group_by_ast->children; + + /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column + /// With set number, which is used as an additional key at the stage of merging aggregating data. + if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + aggregated_columns.emplace_back("__grouping_set", std::make_shared()); + for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { ssize_t size = group_asts.size(); @@ -332,46 +340,105 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) if (getContext()->getSettingsRef().enable_positional_arguments) replaceForPositionalArguments(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY); - getRootActionsNoMakeSet(group_asts[i], temp_actions, false); - - const auto & column_name = group_asts[i]->getColumnName(); - - const auto * node = temp_actions->tryFindInIndex(column_name); - if (!node) - throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); - - /// Only removes constant keys if it's an initiator or distributed_group_by_no_merge is enabled. - if (getContext()->getClientInfo().distributed_depth == 0 || settings.distributed_group_by_no_merge > 0) + if (select_query->group_by_with_grouping_sets) { - /// Constant expressions have non-null column pointer at this stage. - if (node->column && isColumnConst(*node->column)) + ASTs group_elements_ast; + const ASTExpressionList * group_ast_element = group_asts[i]->as(); + group_elements_ast = group_ast_element->children; + + NamesAndTypesList grouping_set_list; + + for (ssize_t j = 0; j < ssize_t(group_elements_ast.size()); ++j) { - select_query->group_by_with_constant_keys = true; + getRootActionsNoMakeSet(group_elements_ast[j], temp_actions, false); - /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. - if (!aggregate_descriptions.empty() || size > 1) + ssize_t group_size = group_elements_ast.size(); + const auto & column_name = group_elements_ast[j]->getColumnName(); + const auto * node = temp_actions->tryFindInIndex(column_name); + if (!node) + throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); + + /// Only removes constant keys if it's an initiator or distributed_group_by_no_merge is enabled. + if (getContext()->getClientInfo().distributed_depth == 0 || settings.distributed_group_by_no_merge > 0) { - if (i + 1 < static_cast(size)) - group_asts[i] = std::move(group_asts.back()); + /// Constant expressions have non-null column pointer at this stage. + if (node->column && isColumnConst(*node->column)) + { + select_query->group_by_with_constant_keys = true; - group_asts.pop_back(); + /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. + if (!aggregate_descriptions.empty() || group_size > 1) + { + if (j + 1 < static_cast(group_size)) + group_elements_ast[j] = std::move(group_elements_ast.back()); - --i; - continue; + group_elements_ast.pop_back(); + + --j; + continue; + } + } + } + + NameAndTypePair key{column_name, node->result_type}; + + grouping_set_list.push_back(key); + + /// Aggregation keys are unique. + if (!unique_keys.contains(key.name)) + { + unique_keys.insert(key.name); + aggregation_keys.push_back(key); + + /// Key is no longer needed, therefore we can save a little by moving it. + aggregated_columns.push_back(std::move(key)); } } + + aggregation_keys_list.push_back(std::move(grouping_set_list)); } - - NameAndTypePair key{column_name, node->result_type}; - - /// Aggregation keys are uniqued. - if (!unique_keys.contains(key.name)) + else { - unique_keys.insert(key.name); - aggregation_keys.push_back(key); + getRootActionsNoMakeSet(group_asts[i], temp_actions, false); - /// Key is no longer needed, therefore we can save a little by moving it. - aggregated_columns.push_back(std::move(key)); + const auto & column_name = group_asts[i]->getColumnName(); + const auto * node = temp_actions->tryFindInIndex(column_name); + if (!node) + throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); + + /// Only removes constant keys if it's an initiator or distributed_group_by_no_merge is enabled. + if (getContext()->getClientInfo().distributed_depth == 0 || settings.distributed_group_by_no_merge > 0) + { + /// Constant expressions have non-null column pointer at this stage. + if (node->column && isColumnConst(*node->column)) + { + select_query->group_by_with_constant_keys = true; + + /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. + if (!aggregate_descriptions.empty() || size > 1) + { + if (i + 1 < static_cast(size)) + group_asts[i] = std::move(group_asts.back()); + + group_asts.pop_back(); + + --i; + continue; + } + } + } + + NameAndTypePair key{column_name, node->result_type}; + + /// Aggregation keys are uniqued. + if (!unique_keys.contains(key.name)) + { + unique_keys.insert(key.name); + aggregation_keys.push_back(key); + + /// Key is no longer needed, therefore we can save a little by moving it. + aggregated_columns.push_back(std::move(key)); + } } } @@ -1169,10 +1236,24 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join); ASTs asts = select_query->groupBy()->children; - for (const auto & ast : asts) + if (select_query->group_by_with_grouping_sets) { - step.addRequiredOutput(ast->getColumnName()); - getRootActions(ast, only_types, step.actions()); + for (const auto & ast : asts) + { + for (const auto & ast_element : ast->children) + { + step.addRequiredOutput(ast_element->getColumnName()); + getRootActions(ast_element, only_types, step.actions()); + } + } + } + else + { + for (const auto & ast : asts) + { + step.addRequiredOutput(ast->getColumnName()); + getRootActions(ast, only_types, step.actions()); + } } if (optimize_aggregation_in_order) @@ -1584,6 +1665,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( , second_stage(second_stage_) , need_aggregate(query_analyzer.hasAggregation()) , has_window(query_analyzer.hasWindow()) + , use_grouping_set_key(query_analyzer.useGroupingSetKey()) { /// first_stage: Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. /// second_stage: Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing. diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 0fbd9cdaac1..b3704095c92 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -64,6 +64,7 @@ struct ExpressionAnalyzerData bool has_aggregation = false; NamesAndTypesList aggregation_keys; + NamesAndTypesLists aggregation_keys_list; bool has_const_aggregation_keys = false; AggregateDescriptions aggregate_descriptions; @@ -221,6 +222,8 @@ struct ExpressionAnalysisResult bool optimize_aggregation_in_order = false; bool join_has_delayed_stream = false; + bool use_grouping_set_key = false; + ActionsDAGPtr before_array_join; ArrayJoinActionPtr array_join; ActionsDAGPtr before_join; @@ -321,8 +324,11 @@ public: bool hasGlobalSubqueries() { return has_global_subqueries; } bool hasTableJoin() const { return syntax->ast_join; } + bool useGroupingSetKey() const { return aggregation_keys_list.size() > 1; } + const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; } bool hasConstAggregationKeys() const { return has_const_aggregation_keys; } + const NamesAndTypesLists & aggregationKeysList() const { return aggregation_keys_list; } const AggregateDescriptions & aggregates() const { return aggregate_descriptions; } std::unique_ptr getJoinedPlan(); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 848d69834d2..3b4f7eda24d 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -81,6 +81,8 @@ #include #include #include +#include +#include #include #include #include @@ -218,7 +220,7 @@ static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & table QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select()); CrossToInnerJoinVisitor::Data cross_to_inner{tables, aliases, database}; - cross_to_inner.cross_to_inner_join_rewrite = settings.cross_to_inner_join_rewrite; + cross_to_inner.cross_to_inner_join_rewrite = static_cast(std::min(settings.cross_to_inner_join_rewrite, 2)); CrossToInnerJoinVisitor(cross_to_inner).visit(query); JoinToSubqueryTransformVisitor::Data join_to_subs_data{tables, aliases}; @@ -736,6 +738,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl() Block res; + if (analysis_result.use_grouping_set_key) + res.insert({ nullptr, std::make_shared(), "__grouping_set" }); + for (const auto & key : query_analyzer->aggregationKeys()) res.insert({nullptr, header.getByName(key.name).type, key.name}); @@ -1082,6 +1087,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

QueryProcessingStage::WithMergeableState && !query.group_by_with_totals && !query.group_by_with_rollup && !query.group_by_with_cube; + bool use_grouping_set_key = expressions.use_grouping_set_key; + + if (query.group_by_with_grouping_sets && query.group_by_with_totals) + throw Exception("WITH TOTALS and GROUPING SETS are not supported together", ErrorCodes::NOT_IMPLEMENTED); + if (query_info.projection && query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) { query_info.projection->aggregate_overflow_row = aggregate_overflow_row; @@ -1196,7 +1206,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

aggregate_overflow_row, query_info.projection->aggregate_final, false, + false, context_->getSettingsRef(), query_info.projection->aggregation_keys, query_info.projection->aggregate_descriptions); @@ -2165,7 +2178,6 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } } - void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) { auto where_step = std::make_unique( @@ -2175,6 +2187,80 @@ void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsD query_plan.addStep(std::move(where_step)); } +static Aggregator::Params getAggregatorParams( + const ASTPtr & query_ptr, + const SelectQueryExpressionAnalyzer & query_analyzer, + const Context & context, + const Block & current_data_stream_header, + const ColumnNumbers & keys, + const AggregateDescriptions & aggregates, + bool overflow_row, const Settings & settings, + size_t group_by_two_level_threshold, size_t group_by_two_level_threshold_bytes) +{ + const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( + query_ptr, + settings.collect_hash_table_stats_during_aggregation, + settings.max_entries_for_hash_table_stats, + settings.max_size_to_preallocate_for_aggregation); + + return Aggregator::Params{ + current_data_stream_header, + keys, + aggregates, + overflow_row, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + group_by_two_level_threshold, + group_by_two_level_threshold_bytes, + settings.max_bytes_before_external_group_by, + settings.empty_result_for_aggregation_by_empty_set + || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && keys.empty() + && query_analyzer.hasConstAggregationKeys()), + context.getTemporaryVolume(), + settings.max_threads, + settings.min_free_disk_space_for_temporary_data, + settings.compile_aggregate_expressions, + settings.min_count_to_compile_aggregate_expression, + Block{}, + stats_collecting_params + }; +} + +static GroupingSetsParamsList getAggregatorGroupingSetsParams( + const SelectQueryExpressionAnalyzer & query_analyzer, + const Block & header_before_aggregation, + const ColumnNumbers & all_keys +) +{ + GroupingSetsParamsList result; + if (query_analyzer.useGroupingSetKey()) + { + auto const & aggregation_keys_list = query_analyzer.aggregationKeysList(); + + ColumnNumbersList grouping_sets_with_keys; + ColumnNumbersList missing_columns_per_set; + + for (const auto & aggregation_keys : aggregation_keys_list) + { + ColumnNumbers keys; + std::unordered_set keys_set; + for (const auto & key : aggregation_keys) + { + keys.push_back(header_before_aggregation.getPositionByName(key.name)); + keys_set.insert(keys.back()); + } + + ColumnNumbers missing_indexes; + for (size_t i = 0; i < all_keys.size(); ++i) + { + if (!keys_set.contains(all_keys[i])) + missing_indexes.push_back(i); + } + result.emplace_back(std::move(keys), std::move(missing_indexes)); + } + } + return result; +} void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) { @@ -2186,9 +2272,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac return; const auto & header_before_aggregation = query_plan.getCurrentDataStream().header; - ColumnNumbers keys; - for (const auto & key : query_analyzer->aggregationKeys()) - keys.push_back(header_before_aggregation.getPositionByName(key.name)); AggregateDescriptions aggregates = query_analyzer->aggregates(); for (auto & descr : aggregates) @@ -2198,32 +2281,14 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac const Settings & settings = context->getSettingsRef(); - const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( - query_ptr, - settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, - settings.max_size_to_preallocate_for_aggregation); + ColumnNumbers keys; + for (const auto & key : query_analyzer->aggregationKeys()) + keys.push_back(header_before_aggregation.getPositionByName(key.name)); - Aggregator::Params params( - header_before_aggregation, - keys, - aggregates, - overflow_row, - settings.max_rows_to_group_by, - settings.group_by_overflow_mode, - settings.group_by_two_level_threshold, - settings.group_by_two_level_threshold_bytes, - settings.max_bytes_before_external_group_by, - settings.empty_result_for_aggregation_by_empty_set - || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && keys.empty() - && query_analyzer->hasConstAggregationKeys()), - context->getTemporaryVolume(), - settings.max_threads, - settings.min_free_disk_space_for_temporary_data, - settings.compile_aggregate_expressions, - settings.min_count_to_compile_aggregate_expression, - Block{}, - stats_collecting_params); + auto aggregator_params = getAggregatorParams(query_ptr, *query_analyzer, *context, header_before_aggregation, keys, aggregates, overflow_row, settings, + settings.group_by_two_level_threshold, settings.group_by_two_level_threshold_bytes); + + auto grouping_sets_params = getAggregatorGroupingSetsParams(*query_analyzer, header_before_aggregation, keys); SortDescription group_by_sort_description; @@ -2241,7 +2306,8 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac auto aggregating_step = std::make_unique( query_plan.getCurrentDataStream(), - params, + std::move(aggregator_params), + std::move(grouping_sets_params), final, settings.max_block_size, settings.aggregation_in_order_max_block_bytes, @@ -2250,11 +2316,10 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac storage_has_evenly_distributed_read, std::move(group_by_info), std::move(group_by_sort_description)); - query_plan.addStep(std::move(aggregating_step)); } -void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final) +void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets) { /// If aggregate projection was chosen for table, avoid adding MergeAggregated. /// It is already added by storage (because of performance issues). @@ -2268,6 +2333,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, final, storage && storage->isRemote(), + has_grouping_sets, context->getSettingsRef(), query_analyzer->aggregationKeys(), query_analyzer->aggregates()); @@ -2302,47 +2368,28 @@ void InterpreterSelectQuery::executeTotalsAndHaving( query_plan.addStep(std::move(totals_having_step)); } - void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modificator modificator) { const auto & header_before_transform = query_plan.getCurrentDataStream().header; - ColumnNumbers keys; + const Settings & settings = context->getSettingsRef(); + ColumnNumbers keys; for (const auto & key : query_analyzer->aggregationKeys()) keys.push_back(header_before_transform.getPositionByName(key.name)); - const Settings & settings = context->getSettingsRef(); - - Aggregator::Params params( - header_before_transform, - keys, - query_analyzer->aggregates(), - false, - settings.max_rows_to_group_by, - settings.group_by_overflow_mode, - 0, - 0, - settings.max_bytes_before_external_group_by, - settings.empty_result_for_aggregation_by_empty_set, - context->getTemporaryVolume(), - settings.max_threads, - settings.min_free_disk_space_for_temporary_data, - settings.compile_aggregate_expressions, - settings.min_count_to_compile_aggregate_expression); - - auto transform_params = std::make_shared(params, true); + auto params = getAggregatorParams(query_ptr, *query_analyzer, *context, header_before_transform, keys, query_analyzer->aggregates(), false, settings, 0, 0); + auto transform_params = std::make_shared(std::move(params), true); QueryPlanStepPtr step; if (modificator == Modificator::ROLLUP) step = std::make_unique(query_plan.getCurrentDataStream(), std::move(transform_params)); - else + else if (modificator == Modificator::CUBE) step = std::make_unique(query_plan.getCurrentDataStream(), std::move(transform_params)); query_plan.addStep(std::move(step)); } - void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description) { if (!expression) diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index bff2f6c4f90..3adbcad909c 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -27,6 +27,9 @@ class InterpreterSelectWithUnionQuery; class Context; class QueryPlan; +struct GroupingSetsParams; +using GroupingSetsParamsList = std::vector; + struct TreeRewriterResult; using TreeRewriterResultPtr = std::shared_ptr; @@ -140,12 +143,11 @@ private: void executeImpl(QueryPlan & query_plan, std::optional prepared_pipe); /// Different stages of query execution. - void executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan); void executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); void executeAggregation( QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); - void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final); + void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets); void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final); void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description); @@ -171,7 +173,7 @@ private: enum class Modificator { ROLLUP = 0, - CUBE = 1 + CUBE = 1, }; void executeRollupOrCube(QueryPlan & query_plan, Modificator modificator); diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 19041a19aa4..8c11b9dc9d0 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -210,10 +210,22 @@ GroupByKeysInfo getGroupByKeysInfo(const ASTs & group_by_keys) /// filling set with short names of keys for (const auto & group_key : group_by_keys) { - if (group_key->as()) - data.has_function = true; + /// for grouping sets case + if (group_key->as()) + { + const auto express_list_ast = group_key->as(); + for (const auto & group_elem : express_list_ast.children) + { + data.key_names.insert(group_elem->getColumnName()); + } + } + else + { + if (group_key->as()) + data.has_function = true; - data.key_names.insert(group_key->getColumnName()); + data.key_names.insert(group_key->getColumnName()); + } } return data; diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp index 2724465537f..2590c6b2941 100644 --- a/src/Parsers/ASTExpressionList.cpp +++ b/src/Parsers/ASTExpressionList.cpp @@ -26,7 +26,15 @@ void ASTExpressionList::formatImpl(const FormatSettings & settings, FormatState settings.ostr << ' '; } - (*it)->formatImpl(settings, state, frame); + if (frame.surround_each_list_element_with_parens) + settings.ostr << "("; + + FormatStateStacked frame_nested = frame; + frame_nested.surround_each_list_element_with_parens = false; + (*it)->formatImpl(settings, state, frame_nested); + + if (frame.surround_each_list_element_with_parens) + settings.ostr << ")"; } } @@ -41,6 +49,7 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For } ++frame.indent; + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) { if (it != children.begin()) @@ -54,7 +63,15 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For FormatStateStacked frame_nested = frame; frame_nested.expression_list_always_start_on_new_line = false; + frame_nested.surround_each_list_element_with_parens = false; + + if (frame.surround_each_list_element_with_parens) + settings.ostr << "("; + (*it)->formatImpl(settings, state, frame_nested); + + if (frame.surround_each_list_element_with_parens) + settings.ostr << ")"; } } diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 43186056077..4408fd21465 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -96,9 +96,12 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F if (groupBy()) { s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY" << (s.hilite ? hilite_none : ""); - s.one_line + if (!group_by_with_grouping_sets) + { + s.one_line ? groupBy()->formatImpl(s, state, frame) : groupBy()->as().formatImplMultiline(s, state, frame); + } } if (group_by_with_rollup) @@ -107,6 +110,18 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F if (group_by_with_cube) s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH CUBE" << (s.hilite ? hilite_none : ""); + if (group_by_with_grouping_sets) + { + frame.surround_each_list_element_with_parens = true; + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "GROUPING SETS" << (s.hilite ? hilite_none : ""); + s.ostr << " ("; + s.one_line + ? groupBy()->formatImpl(s, state, frame) + : groupBy()->as().formatImplMultiline(s, state, frame); + s.ostr << ")"; + frame.surround_each_list_element_with_parens = false; + } + if (group_by_with_totals) s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH TOTALS" << (s.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 4a30d6afee3..704aeeeea7c 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -86,6 +86,7 @@ public: bool group_by_with_rollup = false; bool group_by_with_cube = false; bool group_by_with_constant_keys = false; + bool group_by_with_grouping_sets = false; bool limit_with_ties = false; ASTPtr & refSelect() { return getExpression(Expression::SELECT); } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 8f5d1fa4dda..caf9be1fea6 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -755,13 +755,61 @@ bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected return nested_parser.parse(pos, node, expected) && !node->children.empty(); } - bool ParserOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) .parse(pos, node, expected); } +bool ParserGroupingSetsExpressionListElements::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto command_list = std::make_shared(); + node = command_list; + + ParserToken s_comma(TokenType::Comma); + ParserToken s_open(TokenType::OpeningRoundBracket); + ParserToken s_close(TokenType::ClosingRoundBracket); + ParserExpressionWithOptionalAlias p_expression(false); + ParserList p_command(std::make_unique(false), + std::make_unique(TokenType::Comma), true); + + do + { + Pos begin = pos; + ASTPtr command; + if (!s_open.ignore(pos, expected)) + { + pos = begin; + if (!p_expression.parse(pos, command, expected)) + { + return false; + } + auto list = std::make_shared(','); + list->children.push_back(command); + command = std::move(list); + } + else + { + if (!p_command.parse(pos, command, expected)) + return false; + + if (!s_close.ignore(pos, expected)) + break; + } + + command_list->children.push_back(command); + } + while (s_comma.ignore(pos, expected)); + + return true; +} + +bool ParserGroupingSetsExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserGroupingSetsExpressionListElements grouping_sets_elements; + return grouping_sets_elements.parse(pos, node, expected); + +} bool ParserInterpolateExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 2325433a00a..2b127dc2607 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -517,6 +517,20 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserGroupingSetsExpressionList : public IParserBase +{ +protected: + const char * getName() const override { return "grouping sets expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +class ParserGroupingSetsExpressionListElements : public IParserBase +{ +protected: + const char * getName() const override { return "grouping sets expression elements"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + class ParserInterpolateExpressionList : public IParserBase { protected: diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index bd8167c64fe..b73919f4f36 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -224,6 +224,7 @@ public: bool need_parens = false; bool expression_list_always_start_on_new_line = false; /// Line feed and indent before expression list even if it's of single element. bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required) + bool surround_each_list_element_with_parens = false; const IAST * current_select = nullptr; }; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 4e9d5c1d57d..66428b144bf 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -54,6 +54,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_by("BY"); ParserKeyword s_rollup("ROLLUP"); ParserKeyword s_cube("CUBE"); + ParserKeyword s_grouping_sets("GROUPING SETS"); ParserKeyword s_top("TOP"); ParserKeyword s_with_ties("WITH TIES"); ParserKeyword s_offset("OFFSET"); @@ -70,6 +71,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserNotEmptyExpressionList exp_list_for_select_clause(true); /// Allows aliases without AS keyword. ParserExpressionWithOptionalAlias exp_elem(false); ParserOrderByExpressionList order_list; + ParserGroupingSetsExpressionList grouping_sets_list; ParserInterpolateExpressionList interpolate_list; ParserToken open_bracket(TokenType::OpeningRoundBracket); @@ -191,24 +193,39 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->group_by_with_rollup = true; else if (s_cube.ignore(pos, expected)) select_query->group_by_with_cube = true; + else if (s_grouping_sets.ignore(pos, expected)) + select_query->group_by_with_grouping_sets = true; - if ((select_query->group_by_with_rollup || select_query->group_by_with_cube) && !open_bracket.ignore(pos, expected)) + if ((select_query->group_by_with_rollup || select_query->group_by_with_cube || select_query->group_by_with_grouping_sets) && + !open_bracket.ignore(pos, expected)) return false; - if (!exp_list.parse(pos, group_expression_list, expected)) - return false; + if (select_query->group_by_with_grouping_sets) + { + if (!grouping_sets_list.parse(pos, group_expression_list, expected)) + return false; + } + else + { + if (!exp_list.parse(pos, group_expression_list, expected)) + return false; + } - if ((select_query->group_by_with_rollup || select_query->group_by_with_cube) && !close_bracket.ignore(pos, expected)) + + if ((select_query->group_by_with_rollup || select_query->group_by_with_cube || select_query->group_by_with_grouping_sets) && + !close_bracket.ignore(pos, expected)) return false; } - /// WITH ROLLUP, CUBE or TOTALS + /// WITH ROLLUP, CUBE, GROUPING SETS or TOTALS if (s_with.ignore(pos, expected)) { if (s_rollup.ignore(pos, expected)) select_query->group_by_with_rollup = true; else if (s_cube.ignore(pos, expected)) select_query->group_by_with_cube = true; + else if (s_grouping_sets.ignore(pos, expected)) + select_query->group_by_with_grouping_sets = true; else if (s_totals.ignore(pos, expected)) select_query->group_by_with_totals = true; else diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 043e4f1e724..3f6a36e8e8c 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -717,9 +717,7 @@ namespace DB column.type = recursiveRemoveLowCardinality(column.type); column.column = recursiveRemoveLowCardinality(column.column); } - bool is_column_nullable = false; - auto arrow_type = getArrowType(column.type, column.column, column.name, format_name, &is_column_nullable); - arrow_fields.emplace_back(std::make_shared(column.name, arrow_type, is_column_nullable)); + header_columns.emplace_back(std::move(column)); } } @@ -740,6 +738,13 @@ namespace DB if (!low_cardinality_as_dictionary) column = recursiveRemoveLowCardinality(column); + if (!is_arrow_fields_initialized) + { + bool is_column_nullable = false; + auto arrow_type = getArrowType(header_column.type, column, header_column.name, format_name, &is_column_nullable); + arrow_fields.emplace_back(std::make_shared(header_column.name, arrow_type, is_column_nullable)); + } + arrow::MemoryPool* pool = arrow::default_memory_pool(); std::unique_ptr array_builder; arrow::Status status = MakeBuilder(pool, arrow_fields[column_i]->type(), &array_builder); @@ -757,6 +762,7 @@ namespace DB std::shared_ptr arrow_schema = std::make_shared(arrow_fields); res = arrow::Table::Make(arrow_schema, arrow_arrays); + is_arrow_fields_initialized = true; } } diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h index 1fb2a8af65e..50de8045d5f 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h @@ -27,6 +27,11 @@ private: /// To avoid converting dictionary from LowCardinality to Arrow /// Dictionary every chunk we save it and reuse. std::unordered_map> dictionary_values; + + /// We should initialize arrow fields on first call of chChunkToArrowTable, not in constructor + /// because LowCardinality column from header always has indexes type UInt8, so, we should get + /// proper indexes type from first chunk of data. + bool is_arrow_fields_initialized = false; }; } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index e617f9be72c..d7d62d07d92 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -1,10 +1,18 @@ +#include +#include +#include #include #include +#include #include #include #include +#include #include #include +#include +#include +#include namespace DB { @@ -25,9 +33,28 @@ static ITransformingStep::Traits getTraits() }; } +static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params) +{ + if (params.empty()) + return block; + + Block res; + + size_t rows = block.rows(); + auto column = ColumnUInt64::create(rows); + + res.insert({ColumnPtr(std::move(column)), std::make_shared(), "__grouping_set"}); + + for (auto & col : block) + res.insert(std::move(col)); + + return res; +} + AggregatingStep::AggregatingStep( const DataStream & input_stream_, Aggregator::Params params_, + GroupingSetsParamsList grouping_sets_params_, bool final_, size_t max_block_size_, size_t aggregation_in_order_max_block_bytes_, @@ -36,8 +63,9 @@ AggregatingStep::AggregatingStep( bool storage_has_evenly_distributed_read_, InputOrderInfoPtr group_by_info_, SortDescription group_by_sort_description_) - : ITransformingStep(input_stream_, params_.getHeader(final_), getTraits(), false) + : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_), getTraits(), false) , params(std::move(params_)) + , grouping_sets_params(std::move(grouping_sets_params_)) , final(std::move(final_)) , max_block_size(max_block_size_) , aggregation_in_order_max_block_bytes(aggregation_in_order_max_block_bytes_) @@ -49,7 +77,7 @@ AggregatingStep::AggregatingStep( { } -void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { QueryPipelineProcessorsCollector collector(pipeline, this); @@ -80,6 +108,150 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B */ auto transform_params = std::make_shared(std::move(params), final); + if (!grouping_sets_params.empty()) + { + const size_t grouping_sets_size = grouping_sets_params.size(); + + const size_t streams = pipeline.getNumStreams(); + + auto input_header = pipeline.getHeader(); + pipeline.transform([&](OutputPortRawPtrs ports) + { + Processors copiers; + copiers.reserve(ports.size()); + + for (auto * port : ports) + { + auto copier = std::make_shared(input_header, grouping_sets_size); + connect(*port, copier->getInputPort()); + copiers.push_back(copier); + } + + return copiers; + }); + + pipeline.transform([&](OutputPortRawPtrs ports) + { + assert(streams * grouping_sets_size == ports.size()); + Processors processors; + for (size_t i = 0; i < grouping_sets_size; ++i) + { + Aggregator::Params params_for_set + { + transform_params->params.src_header, + grouping_sets_params[i].used_keys, + transform_params->params.aggregates, + transform_params->params.overflow_row, + transform_params->params.max_rows_to_group_by, + transform_params->params.group_by_overflow_mode, + transform_params->params.group_by_two_level_threshold, + transform_params->params.group_by_two_level_threshold_bytes, + transform_params->params.max_bytes_before_external_group_by, + transform_params->params.empty_result_for_aggregation_by_empty_set, + transform_params->params.tmp_volume, + transform_params->params.max_threads, + transform_params->params.min_free_disk_space, + transform_params->params.compile_aggregate_expressions, + transform_params->params.min_count_to_compile_aggregate_expression, + transform_params->params.intermediate_header, + transform_params->params.stats_collecting_params + }; + auto transform_params_for_set = std::make_shared(std::move(params_for_set), final); + + if (streams > 1) + { + auto many_data = std::make_shared(streams); + for (size_t j = 0; j < streams; ++j) + { + auto aggregation_for_set = std::make_shared(input_header, transform_params_for_set, many_data, j, merge_threads, temporary_data_merge_threads); + // For each input stream we have `grouping_sets_size` copies, so port index + // for transform #j should skip ports of first (j-1) streams. + connect(*ports[i + grouping_sets_size * j], aggregation_for_set->getInputs().front()); + ports[i + grouping_sets_size * j] = &aggregation_for_set->getOutputs().front(); + processors.push_back(aggregation_for_set); + } + } + else + { + auto aggregation_for_set = std::make_shared(input_header, transform_params_for_set); + connect(*ports[i], aggregation_for_set->getInputs().front()); + ports[i] = &aggregation_for_set->getOutputs().front(); + processors.push_back(aggregation_for_set); + } + } + + if (streams > 1) + { + OutputPortRawPtrs new_ports; + new_ports.reserve(grouping_sets_size); + + for (size_t i = 0; i < grouping_sets_size; ++i) + { + size_t output_it = i; + auto resize = std::make_shared(ports[output_it]->getHeader(), streams, 1); + auto & inputs = resize->getInputs(); + + for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += grouping_sets_size, ++input_it) + connect(*ports[output_it], *input_it); + new_ports.push_back(&resize->getOutputs().front()); + processors.push_back(resize); + } + + ports.swap(new_ports); + } + + assert(ports.size() == grouping_sets_size); + auto output_header = transform_params->getHeader(); + + for (size_t set_counter = 0; set_counter < grouping_sets_size; ++set_counter) + { + auto & header = ports[set_counter]->getHeader(); + + /// Here we create a DAG which fills missing keys and adds `__grouping_set` column + auto dag = std::make_shared(header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs index; + index.reserve(output_header.columns() + 1); + + auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, set_counter), 0); + const auto * grouping_node = &dag->addColumn( + {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); + + grouping_node = &dag->materializeNode(*grouping_node); + index.push_back(grouping_node); + + size_t missign_column_index = 0; + const auto & missing_columns = grouping_sets_params[set_counter].missing_keys; + + for (size_t i = 0; i < output_header.columns(); ++i) + { + auto & col = output_header.getByPosition(i); + if (missign_column_index < missing_columns.size() && missing_columns[missign_column_index] == i) + { + ++missign_column_index; + auto column = ColumnConst::create(col.column->cloneResized(1), 0); + const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); + node = &dag->materializeNode(*node); + index.push_back(node); + } + else + index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); + } + + dag->getIndex().swap(index); + auto expression = std::make_shared(dag, settings.getActionsSettings()); + auto transform = std::make_shared(header, expression); + + connect(*ports[set_counter], transform->getInputPort()); + processors.emplace_back(std::move(transform)); + } + + return processors; + }); + + aggregating = collector.detachProcessors(0); + return; + } + if (group_by_info) { if (pipeline.getNumStreams() > 1) diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 154ff0abc0a..b933daaa474 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -10,6 +10,21 @@ namespace DB struct AggregatingTransformParams; using AggregatingTransformParamsPtr = std::shared_ptr; +struct GroupingSetsParams +{ + GroupingSetsParams() = default; + + GroupingSetsParams(ColumnNumbers used_keys_, ColumnNumbers missing_keys_) + : used_keys(std::move(used_keys_)) + , missing_keys(std::move(missing_keys_)) + {} + + ColumnNumbers used_keys; + ColumnNumbers missing_keys; +}; + +using GroupingSetsParamsList = std::vector; + /// Aggregation. See AggregatingTransform. class AggregatingStep : public ITransformingStep { @@ -17,6 +32,7 @@ public: AggregatingStep( const DataStream & input_stream_, Aggregator::Params params_, + GroupingSetsParamsList grouping_sets_params_, bool final_, size_t max_block_size_, size_t aggregation_in_order_max_block_bytes_, @@ -39,6 +55,7 @@ public: private: Aggregator::Params params; + GroupingSetsParamsList grouping_sets_params; bool final; size_t max_block_size; size_t aggregation_in_order_max_block_bytes; @@ -55,7 +72,6 @@ private: Processors finalizing; Processors aggregating; - }; } diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 67faaefbf66..19009d9692a 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -509,7 +509,7 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort for (size_t i = 1; i < output_ports.size(); ++i) assertBlocksHaveEqualStructure(header, output_ports[i]->getHeader(), "Pipes"); - // Temporarily skip this check. TotaslHavingTransform may return finalized totals but not finalized data. + // Temporarily skip this check. TotalsHavingTransform may return finalized totals but not finalized data. // if (totals_port) // assertBlocksHaveEqualStructure(header, totals_port->getHeader(), "Pipes"); diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 1f8d72b68d7..f06831f191e 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -201,11 +201,10 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (!cluster_name.empty()) { /// Use an existing cluster from the main config - String cluster_name_expanded = context->getMacros()->expand(cluster_name); if (name != "clusterAllReplicas") - cluster = context->getCluster(cluster_name_expanded); + cluster = context->getCluster(cluster_name); else - cluster = context->getCluster(cluster_name_expanded)->getClusterWithReplicasAsShards(context->getSettingsRef()); + cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); } else { diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a4847f33d45..c9915c1c7f4 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -38,16 +38,17 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, - "binary_gcc": { - "compiler": "gcc-11", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": False, - }, + # FIXME update to gcc-12 and turn on + # "binary_gcc": { + # "compiler": "gcc-11", + # "build_type": "", + # "sanitizer": "", + # "package_type": "binary", + # "bundled": "bundled", + # "splitted": "unsplitted", + # "tidy": "disable", + # "with_coverage": False, + # }, "package_aarch64": { "compiler": "clang-13-aarch64", "build_type": "", @@ -217,7 +218,7 @@ CI_CONFIG = { "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", - "binary_gcc", + # "binary_gcc", ], }, "tests_config": { @@ -323,9 +324,9 @@ CI_CONFIG = { "Testflows check (actions)": { "required_build": "package_release", }, - "Unit tests (release-gcc, actions)": { - "required_build": "binary_gcc", - }, + # "Unit tests (release-gcc, actions)": { + # "required_build": "binary_gcc", + # }, "Unit tests (release-clang, actions)": { "required_build": "binary_release", }, diff --git a/tests/ci/git_helper.py b/tests/ci/git_helper.py index 0921ace35f6..e3ad0eb39c0 100644 --- a/tests/ci/git_helper.py +++ b/tests/ci/git_helper.py @@ -52,11 +52,11 @@ class Runner: def __init__(self, cwd: str = CWD): self._cwd = cwd - def run(self, cmd: str, cwd: Optional[str] = None) -> str: + def run(self, cmd: str, cwd: Optional[str] = None, **kwargs) -> str: if cwd is None: cwd = self.cwd return subprocess.check_output( - cmd, shell=True, cwd=cwd, encoding="utf-8" + cmd, shell=True, cwd=cwd, encoding="utf-8", **kwargs ).strip() @property @@ -78,8 +78,12 @@ git_runner.cwd = p.relpath( ) +def is_shallow() -> bool: + return git_runner.run("git rev-parse --is-shallow-repository") == "true" + + def get_tags() -> List[str]: - if git_runner.run("git rev-parse --is-shallow-repository") == "true": + if is_shallow(): raise RuntimeError("attempt to run on a shallow repository") return git_runner.run("git tag").split() @@ -110,10 +114,7 @@ class Git: self.sha_short = self.sha[:11] # The following command shows the most recent tag in a graph # Format should match TAG_REGEXP - if ( - self._ignore_no_tags - and self.run("git rev-parse --is-shallow-repository") == "true" - ): + if self._ignore_no_tags and is_shallow(): try: self._update_tags() except subprocess.CalledProcessError: diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 517a94544f9..cf0dfe51e9b 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -210,22 +210,29 @@ if __name__ == "__main__": json_path = os.path.join(work_path, "params.json") with open(json_path, "w", encoding="utf-8") as json_params: - json_params.write( - json.dumps( - get_json_params_dict( - check_name, - pr_info, - images_with_versions, - run_by_hash_total, - run_by_hash_num, - ) + params_text = json.dumps( + get_json_params_dict( + check_name, + pr_info, + images_with_versions, + run_by_hash_total, + run_by_hash_num, ) ) + json_params.write(params_text) + logging.info("Parameters file %s is written: %s", json_path, params_text) output_path_log = os.path.join(result_path, "main_script_log.txt") runner_path = os.path.join(repo_path, "tests/integration", "ci-runner.py") run_command = f"sudo -E {runner_path} | tee {output_path_log}" + logging.info("Going to run command: `%s`", run_command) + logging.info( + "ENV parameters for runner:\n%s", + "\n".join( + [f"{k}={v}" for k, v in my_env.items() if k.startswith("CLICKHOUSE_")] + ), + ) with TeePopen(run_command, output_path_log, my_env) as process: retcode = process.wait() diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 687c88b04a3..20787786aa4 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -246,8 +246,7 @@ class ClickhouseIntegrationTestsRunner: return name + ":latest" return name - def get_single_image_version(self): - name = self.get_images_names()[0] + def get_image_version(self, name: str): if name in self.image_versions: return self.image_versions[name] logging.warn( @@ -473,7 +472,7 @@ class ClickhouseIntegrationTestsRunner: ): for img in self.get_images_names(): if img == "clickhouse/integration-tests-runner": - runner_version = self.get_single_image_version() + runner_version = self.get_image_version(img) logging.info( "Can run with custom docker image version %s", runner_version ) diff --git a/tests/integration/test_s3_cluster/configs/cluster.xml b/tests/integration/test_s3_cluster/configs/cluster.xml index 404a15b1273..18f15763633 100644 --- a/tests/integration/test_s3_cluster/configs/cluster.xml +++ b/tests/integration/test_s3_cluster/configs/cluster.xml @@ -21,4 +21,7 @@ + + cluster_simple + \ No newline at end of file diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 93708acd49c..2cbb36fcf06 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -95,6 +95,29 @@ def test_count(started_cluster): assert TSV(pure_s3) == TSV(s3_distibuted) +def test_count_macro(started_cluster): + node = started_cluster.instances["s0_0_0"] + + s3_macro = node.query( + """ + SELECT count(*) from s3Cluster( + '{default_cluster_macro}', 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""" + ) + # print(s3_distibuted) + s3_distibuted = node.query( + """ + SELECT count(*) from s3Cluster( + 'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""" + ) + # print(s3_distibuted) + + assert TSV(s3_macro) == TSV(s3_distibuted) + + def test_union_all(started_cluster): node = started_cluster.instances["s0_0_0"] pure_s3 = node.query( diff --git a/tests/integration/test_storage_hdfs/configs/macro.xml b/tests/integration/test_storage_hdfs/configs/macro.xml new file mode 100644 index 00000000000..c2e11b47a5e --- /dev/null +++ b/tests/integration/test_storage_hdfs/configs/macro.xml @@ -0,0 +1,5 @@ + + + test_cluster_two_shards + + \ No newline at end of file diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index bd9e61a9422..0490c0c1f0d 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -2,10 +2,13 @@ import os import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV from pyhdfs import HdfsClient cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_hdfs=True) +node1 = cluster.add_instance( + "node1", main_configs=["configs/macro.xml"], with_hdfs=True +) @pytest.fixture(scope="module") @@ -589,6 +592,22 @@ def test_cluster_join(started_cluster): assert "AMBIGUOUS_COLUMN_NAME" not in result +def test_cluster_macro(started_cluster): + with_macro = node1.query( + """ + SELECT id FROM hdfsCluster('{default_cluster_macro}', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') + """ + ) + + no_macro = node1.query( + """ + SELECT id FROM hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') + """ + ) + + assert TSV(with_macro) == TSV(no_macro) + + def test_virtual_columns_2(started_cluster): hdfs_api = started_cluster.hdfs_api diff --git a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.reference b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.reference index de22f396eae..3bf688404f8 100644 --- a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.reference +++ b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.reference @@ -1,4 +1,4 @@ -1 4999999950000000 1 1 -2 4999999950000000 1 1 -3 49999995000000 1 1 -4 49999995000000 1 1 +1 4999999950000000 2 +2 4999999950000000 2 +3 49999995000000 2 +4 49999995000000 2 diff --git a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql index 7be0ffbcc8e..001e758284f 100644 --- a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql +++ b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql @@ -1,11 +1,15 @@ drop table if exists t; create table t(n int, a Int64, s String) engine = MergeTree() order by a; -insert into t select 1, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000); -insert into t select 2, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) group by 1; -insert into t select 3, sum(number) as c, getSetting('max_threads') from numbers_mt(10000000) group by 3; -insert into t select 4, sum(number) as c, getSetting('max_threads') as mt from numbers_mt(10000000) group by mt; +set optimize_trivial_insert_select=1; -select n, a, s != '1', s = toString(getSetting('max_threads')) from t order by n; +-- due to aggregate functions, optimize_trivial_insert_select will not be applied +insert into t select 1, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) settings max_insert_threads=4, max_threads=2; +-- due to GROUP BY, optimize_trivial_insert_select will not be applied +insert into t select 2, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) group by 1 settings max_insert_threads=4, max_threads=2; +insert into t select 3, sum(number) as c, getSetting('max_threads') from numbers_mt(10000000) group by 3 settings max_insert_threads=4, max_threads=2; +insert into t select 4, sum(number) as c, getSetting('max_threads') as mt from numbers_mt(10000000) group by mt settings max_insert_threads=4, max_threads=2; + +select n, a, s from t order by n; drop table t; diff --git a/tests/queries/0_stateless/01883_grouping_sets_crash.reference b/tests/queries/0_stateless/01883_grouping_sets_crash.reference new file mode 100644 index 00000000000..4d9e967b766 --- /dev/null +++ b/tests/queries/0_stateless/01883_grouping_sets_crash.reference @@ -0,0 +1,209 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +SECOND QUERY: +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +THIRD QUERY: +\N 1 0 0 +\N 2 0 0 +\N 3 0 0 +\N 4 0 0 +\N 5 0 0 +\N 1 0 0 +\N 2 0 0 +\N 3 0 0 +\N 4 0 0 +\N 5 0 0 +\N 0 10 10 +\N 0 9 9 +\N 0 8 8 +\N 0 7 7 +\N 0 6 6 +\N 0 5 5 +\N 0 4 4 +\N 0 3 3 +\N 0 2 2 +\N 0 1 1 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +\N 0 0 0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +w\0\0ldworldwo\0l\0world +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 diff --git a/tests/queries/0_stateless/01883_grouping_sets_crash.sql b/tests/queries/0_stateless/01883_grouping_sets_crash.sql new file mode 100644 index 00000000000..cf56c8546ce --- /dev/null +++ b/tests/queries/0_stateless/01883_grouping_sets_crash.sql @@ -0,0 +1,99 @@ +DROP TABLE IF EXISTS grouping_sets; + +CREATE TABLE grouping_sets(fact_1_id Int32, fact_2_id Int32, fact_3_id Int32, fact_4_id Int32, sales_value Int32) ENGINE = Memory; + +INSERT INTO grouping_sets +SELECT + number % 2 + 1 AS fact_1_id, + number % 5 + 1 AS fact_2_id, + number % 10 + 1 AS fact_3_id, + number % 10 + 1 AS fact_4_id, + number % 100 AS sales_value +FROM system.numbers limit 1000; + +SELECT + fact_3_id, + fact_4_id +FROM grouping_sets +GROUP BY + GROUPING SETS ( + ('wo\0ldworldwo\0ldworld'), + (fact_3_id, fact_4_id)) +ORDER BY + fact_3_id, fact_4_id; + +SELECT 'SECOND QUERY:'; + +SELECT + fact_3_id, + fact_4_id +FROM grouping_sets +GROUP BY + GROUPING SETS ( + (fact_1_id, fact_2_id), + ((-9223372036854775808, NULL, (tuple(1.), (tuple(1.), 1048576), 65535))), + ((tuple(3.4028234663852886e38), (tuple(1024), -2147483647), NULL)), + (fact_3_id, fact_4_id)) +ORDER BY + (NULL, ('256', (tuple(NULL), NULL), NULL, NULL), NULL) ASC, + fact_1_id DESC NULLS FIRST, + fact_2_id DESC NULLS FIRST, + fact_4_id ASC; + +SELECT 'THIRD QUERY:'; + +SELECT + extractAllGroups(NULL, 'worldworldworldwo\0ldworldworldworldwo\0ld'), + fact_2_id, + fact_3_id, + fact_4_id +FROM grouping_sets +GROUP BY + GROUPING SETS ( + (sales_value), + (fact_1_id, fact_2_id), + ('wo\0ldworldwo\0ldworld'), + (fact_3_id, fact_4_id)) +ORDER BY + fact_1_id DESC NULLS LAST, + fact_1_id DESC NULLS FIRST, + fact_2_id ASC, + fact_3_id DESC NULLS FIRST, + fact_4_id ASC; + +SELECT fact_3_id +FROM grouping_sets +GROUP BY + GROUPING SETS ((fact_3_id, fact_4_id)) +ORDER BY fact_3_id ASC; + +-- Following two queries were fuzzed +SELECT 'w\0\0ldworldwo\0l\0world' +FROM grouping_sets +GROUP BY + GROUPING SETS ( + ( fact_4_id), + ( NULL), + ( fact_3_id, fact_4_id)) +ORDER BY + NULL ASC, + NULL DESC NULLS FIRST, + fact_3_id ASC, + fact_3_id ASC NULLS LAST, + 'wo\0ldworldwo\0ldworld' ASC NULLS LAST, + 'w\0\0ldworldwo\0l\0world' DESC NULLS FIRST, + 'wo\0ldworldwo\0ldworld' ASC, + NULL ASC NULLS FIRST, + fact_4_id DESC NULLS LAST; + +SELECT fact_3_id +FROM grouping_sets +GROUP BY + GROUPING SETS ( + ( 'wo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworld'), + ( NULL), + ( fact_4_id), + ( fact_3_id, fact_4_id)) +ORDER BY fact_3_id ASC NULLS FIRST; + +DROP TABLE IF EXISTS grouping_sets; \ No newline at end of file diff --git a/tests/queries/0_stateless/01883_with_grouping_sets.reference b/tests/queries/0_stateless/01883_with_grouping_sets.reference new file mode 100644 index 00000000000..a036ccb0796 --- /dev/null +++ b/tests/queries/0_stateless/01883_with_grouping_sets.reference @@ -0,0 +1,208 @@ +(Expression) +ExpressionTransform + (Sorting) + MergingSortedTransform 2 → 1 + MergeSortingTransform × 2 + LimitsCheckingTransform × 2 + PartialSortingTransform × 2 + (Expression) + ExpressionTransform × 2 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (SettingQuotaAndLimits) + (ReadFromStorage) + Memory 0 → 1 +1 0 1 4500 +1 0 3 4700 +1 0 5 4900 +1 0 7 5100 +1 0 9 5300 +1 1 0 4500 +1 2 0 5100 +1 3 0 4700 +1 4 0 5300 +1 5 0 4900 +2 0 2 4600 +2 0 4 4800 +2 0 6 5000 +2 0 8 5200 +2 0 10 5400 +2 1 0 5000 +2 2 0 4600 +2 3 0 5200 +2 4 0 4800 +2 5 0 5400 +0 0 1 1 4500 +0 0 2 2 4600 +0 0 3 3 4700 +0 0 4 4 4800 +0 0 5 5 4900 +0 0 6 6 5000 +0 0 7 7 5100 +0 0 8 8 5200 +0 0 9 9 5300 +0 0 10 10 5400 +1 1 0 0 4500 +1 2 0 0 5100 +1 3 0 0 4700 +1 4 0 0 5300 +1 5 0 0 4900 +2 1 0 0 5000 +2 2 0 0 4600 +2 3 0 0 5200 +2 4 0 0 4800 +2 5 0 0 5400 +0 0 0 49500 +0 0 1 4500 +0 0 2 4600 +0 0 3 4700 +0 0 4 4800 +0 0 5 4900 +0 0 6 5000 +0 0 7 5100 +0 0 8 5200 +0 0 9 5300 +0 0 10 5400 +1 1 0 4500 +1 2 0 5100 +1 3 0 4700 +1 4 0 5300 +1 5 0 4900 +2 1 0 5000 +2 2 0 4600 +2 3 0 5200 +2 4 0 4800 +2 5 0 5400 +(Expression) +ExpressionTransform + (Sorting) + MergingSortedTransform 2 → 1 + MergeSortingTransform × 2 + LimitsCheckingTransform × 2 + PartialSortingTransform × 2 + (Expression) + ExpressionTransform × 2 + (Aggregating) + ExpressionTransform × 2 + Resize × 2 3 → 1 + AggregatingTransform × 6 + Copy × 3 1 → 2 + (Expression) + ExpressionTransform × 3 + (SettingQuotaAndLimits) + (ReadFromStorage) + NumbersMt × 3 0 → 1 +4999500000 10000 +4999510000 10000 +4999520000 10000 +4999530000 10000 +4999540000 10000 +4999550000 10000 +4999560000 10000 +4999570000 10000 +4999580000 10000 +4999590000 10000 +4999600000 10000 +4999610000 10000 +4999620000 10000 +4999630000 10000 +4999640000 10000 +4999650000 10000 +4999660000 10000 +4999670000 10000 +4999680000 10000 +4999690000 10000 +4999700000 10000 +4999710000 10000 +4999720000 10000 +4999730000 10000 +4999740000 10000 +4999750000 10000 +4999760000 10000 +4999770000 10000 +4999780000 10000 +4999790000 10000 +4999800000 10000 +4999810000 10000 +4999820000 10000 +4999830000 10000 +4999840000 10000 +4999850000 10000 +4999860000 10000 +4999870000 10000 +4999880000 10000 +4999890000 10000 +4999900000 10000 +4999910000 10000 +4999920000 10000 +4999930000 10000 +4999940000 10000 +4999950000 10000 +4999960000 10000 +4999970000 10000 +4999980000 10000 +4999990000 10000 +5000000000 10000 +5000010000 10000 +5000020000 10000 +5000030000 10000 +5000040000 10000 +5000050000 10000 +5000060000 10000 +5000070000 10000 +5000080000 10000 +5000090000 10000 +5000100000 10000 +5000110000 10000 +5000120000 10000 +5000130000 10000 +5000140000 10000 +5000150000 10000 +5000160000 10000 +5000170000 10000 +5000180000 10000 +5000190000 10000 +5000200000 10000 +5000210000 10000 +5000220000 10000 +5000230000 10000 +5000240000 10000 +5000250000 10000 +5000260000 10000 +5000270000 10000 +5000280000 10000 +5000290000 10000 +5000300000 10000 +5000310000 10000 +5000320000 10000 +5000330000 10000 +5000340000 10000 +5000350000 10000 +5000360000 10000 +5000370000 10000 +5000380000 10000 +5000390000 10000 +5000400000 10000 +5000410000 10000 +5000420000 10000 +5000430000 10000 +5000440000 10000 +5000450000 10000 +5000460000 10000 +5000470000 10000 +5000480000 10000 +5000490000 10000 +49999500000 100000 +49999600000 100000 +49999700000 100000 +49999800000 100000 +49999900000 100000 +50000000000 100000 +50000100000 100000 +50000200000 100000 +50000300000 100000 +50000400000 100000 diff --git a/tests/queries/0_stateless/01883_with_grouping_sets.sql b/tests/queries/0_stateless/01883_with_grouping_sets.sql new file mode 100644 index 00000000000..bf96248e10e --- /dev/null +++ b/tests/queries/0_stateless/01883_with_grouping_sets.sql @@ -0,0 +1,58 @@ +DROP TABLE IF EXISTS grouping_sets; + +CREATE TABLE grouping_sets(fact_1_id Int32, fact_2_id Int32, fact_3_id Int32, fact_4_id Int32, sales_value Int32) ENGINE = Memory; + +SELECT fact_1_id, fact_3_id, sum(sales_value), count() from grouping_sets GROUP BY GROUPING SETS(fact_1_id, fact_3_id) ORDER BY fact_1_id, fact_3_id; + +INSERT INTO grouping_sets +SELECT + number % 2 + 1 AS fact_1_id, + number % 5 + 1 AS fact_2_id, + number % 10 + 1 AS fact_3_id, + number % 10 + 1 AS fact_4_id, + number % 100 AS sales_value +FROM system.numbers limit 1000; + +EXPLAIN PIPELINE +SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value from grouping_sets +GROUP BY GROUPING SETS ((fact_1_id, fact_2_id), (fact_1_id, fact_3_id)) +ORDER BY fact_1_id, fact_2_id, fact_3_id; + +SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value from grouping_sets +GROUP BY GROUPING SETS ((fact_1_id, fact_2_id), (fact_1_id, fact_3_id)) +ORDER BY fact_1_id, fact_2_id, fact_3_id; + +SELECT fact_1_id, fact_2_id, fact_3_id, fact_4_id, SUM(sales_value) AS sales_value from grouping_sets +GROUP BY GROUPING SETS ((fact_1_id, fact_2_id), (fact_3_id, fact_4_id)) +ORDER BY fact_1_id, fact_2_id, fact_3_id, fact_4_id; + +SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value from grouping_sets +GROUP BY GROUPING SETS ((fact_1_id, fact_2_id), (fact_3_id), ()) +ORDER BY fact_1_id, fact_2_id, fact_3_id; + +SELECT + fact_1_id, + fact_3_id, + SUM(sales_value) AS sales_value +FROM grouping_sets +GROUP BY grouping sets ((fact_1_id), (fact_1_id, fact_3_id)) WITH TOTALS +ORDER BY fact_1_id, fact_3_id; -- { serverError NOT_IMPLEMENTED } + +SELECT + fact_1_id, + fact_3_id, + SUM(sales_value) AS sales_value +FROM grouping_sets +GROUP BY grouping sets (fact_1_id, (fact_1_id, fact_3_id)) WITH TOTALS +ORDER BY fact_1_id, fact_3_id; -- { serverError NOT_IMPLEMENTED } + +DROP TABLE grouping_sets; + +EXPLAIN PIPELINE +SELECT SUM(number) as sum_value, count() AS count_value from numbers_mt(1000000) +GROUP BY GROUPING SETS ((number % 10), (number % 100)) +ORDER BY sum_value, count_value SETTINGS max_threads=3; + +SELECT SUM(number) as sum_value, count() AS count_value from numbers_mt(1000000) +GROUP BY GROUPING SETS ((number % 10), (number % 100)) +ORDER BY sum_value, count_value SETTINGS max_threads=3; diff --git a/tests/queries/0_stateless/02165_replicated_grouping_sets.reference b/tests/queries/0_stateless/02165_replicated_grouping_sets.reference new file mode 100644 index 00000000000..659cd98368d --- /dev/null +++ b/tests/queries/0_stateless/02165_replicated_grouping_sets.reference @@ -0,0 +1,13 @@ +0 0 3 2 +0 1 5 2 +0 0 6 3 +0 2 7 2 +1 0 9 3 +0 0 6 4 +0 1 10 4 +0 0 12 6 +0 2 14 4 +1 0 18 6 +0 6 4 +1 10 4 +2 14 4 diff --git a/tests/queries/0_stateless/02165_replicated_grouping_sets.sql b/tests/queries/0_stateless/02165_replicated_grouping_sets.sql new file mode 100644 index 00000000000..d92d92c3e72 --- /dev/null +++ b/tests/queries/0_stateless/02165_replicated_grouping_sets.sql @@ -0,0 +1,45 @@ +SELECT + k1, + k2, + SUM(number) AS sum_value, + count() AS count_value +FROM numbers(6) +GROUP BY + GROUPING SETS + ( + (number % 2 AS k1), + (number % 3 AS k2) + ) +ORDER BY + sum_value ASC, + count_value ASC; + +SELECT + k1, + k2, + SUM(number) AS sum_value, + count() AS count_value +FROM remote('127.0.0.{2,3}', numbers(6)) +GROUP BY + GROUPING SETS + ( + (number % 2 AS k1), + (number % 3 AS k2) + ) +ORDER BY + sum_value ASC, + count_value ASC; + +SELECT + k2, + SUM(number) AS sum_value, + count() AS count_value +FROM remote('127.0.0.{2,3}', numbers(6)) +GROUP BY + GROUPING SETS + ( + (number % 3 AS k2) + ) +ORDER BY + sum_value ASC, + count_value ASC; diff --git a/tests/queries/0_stateless/02286_function_wyhash.reference b/tests/queries/0_stateless/02286_function_wyhash.reference new file mode 100644 index 00000000000..6b861deb81e --- /dev/null +++ b/tests/queries/0_stateless/02286_function_wyhash.reference @@ -0,0 +1,4 @@ +\N +4808886099364463827 +10557035923789874751 +10561902096955922022 diff --git a/tests/queries/0_stateless/02286_function_wyhash.sql b/tests/queries/0_stateless/02286_function_wyhash.sql new file mode 100644 index 00000000000..3307821b465 --- /dev/null +++ b/tests/queries/0_stateless/02286_function_wyhash.sql @@ -0,0 +1,5 @@ +SELECT wyHash64(NULL); +SELECT wyHash64(''); +SELECT wyHash64(' '); +SELECT wyHash64('qwerty'); + diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference new file mode 100644 index 00000000000..9ece9606f8b --- /dev/null +++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.reference @@ -0,0 +1,300 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql new file mode 100644 index 00000000000..3ff6a5ffbb3 --- /dev/null +++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql @@ -0,0 +1,3 @@ +-- Tags: no-fasttest +insert into function file(02293_data.arrow) select toLowCardinality(toString(number)) from numbers(300) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1; +select * from file(02293_data.arrow); diff --git a/utils/changelog/README.md b/utils/changelog/README.md index cd8f8da9b61..8218af83d96 100644 --- a/utils/changelog/README.md +++ b/utils/changelog/README.md @@ -5,17 +5,14 @@ Generate github token: Dependencies: ``` -sudo apt-get install git curl jq python3 python3-fuzzywuzzy -``` - -Update information about tags: -``` -git fetch --tags +sudo apt-get update +sudo apt-get install git python3 python3-fuzzywuzzy python3-github +python3 changelog.py -h ``` Usage example: ``` -export GITHUB_USER=... GITHUB_TOKEN=ghp_... -./changelog.sh v21.5.6.6-stable v21.6.2.7-prestable +python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$GITHUB_TOKEN" v21.6.2.7-prestable +python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$USER" --gh-password="$PASSWORD" v21.6.2.7-prestable ``` diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py new file mode 100755 index 00000000000..a846c240055 --- /dev/null +++ b/utils/changelog/changelog.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python3 +# In our CI this script runs in style-test containers + +import argparse +import logging +import re +from datetime import date, timedelta +from queue import Empty, Queue +from subprocess import CalledProcessError, DEVNULL +from threading import Thread +from typing import Dict, List, Optional, TextIO + +from fuzzywuzzy.fuzz import ratio # type: ignore +from github import Github +from github.NamedUser import NamedUser +from github.PullRequest import PullRequest +from github.Repository import Repository +from git_helper import is_shallow, git_runner as runner + +# This array gives the preferred category order, and is also used to +# normalize category names. +categories_preferred_order = ( + "Backward Incompatible Change", + "New Feature", + "Performance Improvement", + "Improvement", + "Bug Fix", + "Build/Testing/Packaging Improvement", + "Other", +) + +FROM_REF = "" +TO_REF = "" + + +class Description: + def __init__( + self, number: int, user: NamedUser, html_url: str, entry: str, category: str + ): + self.number = number + self.html_url = html_url + self.user = user + self.entry = entry + self.category = category + + @property + def formatted_entry(self) -> str: + # Substitute issue links. + # 1) issue number w/o markdown link + entry = re.sub( + r"([^[])#([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + self.entry, + ) + # 2) issue URL w/o markdown link + entry = re.sub( + r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + entry, + ) + user_name = self.user.name if self.user.name else self.user.login + return ( + f"* {entry} [#{self.number}]({self.html_url}) " + f"([{user_name}]({self.user.html_url}))." + ) + + # Sort PR descriptions by numbers + def __eq__(self, other) -> bool: + if not isinstance(self, type(other)): + return NotImplemented + return self.number == other.number + + def __lt__(self, other: "Description") -> bool: + return self.number < other.number + + +class Worker(Thread): + def __init__(self, request_queue: Queue, repo: Repository): + Thread.__init__(self) + self.queue = request_queue + self.repo = repo + self.response = [] # type: List[Description] + + def run(self): + while not self.queue.empty(): + try: + number = self.queue.get() + except Empty: + break # possible race condition, just continue + api_pr = self.repo.get_pull(number) + in_changelog = False + merge_commit = api_pr.merge_commit_sha + try: + runner.run(f"git rev-parse '{merge_commit}'") + except CalledProcessError: + # It's possible that commit not in the repo, just continue + logging.info("PR %s does not belong to the repo", api_pr.number) + continue + + try: + runner.run( + f"git merge-base --is-ancestor '{merge_commit}' '{TO_REF}'", + stderr=DEVNULL, + ) + runner.run( + f"git merge-base --is-ancestor '{FROM_REF}' '{merge_commit}'", + stderr=DEVNULL, + ) + in_changelog = True + except CalledProcessError: + # Commit is not between from and to refs + continue + if in_changelog: + desc = generate_description(api_pr, self.repo) + if desc is not None: + self.response.append(desc) + + self.queue.task_done() + + +def get_descriptions( + repo: Repository, numbers: List[int], jobs: int +) -> Dict[str, List[Description]]: + workers = [] # type: List[Worker] + queue = Queue() # type: Queue # (!?!?!?!??!) + for number in numbers: + queue.put(number) + for _ in range(jobs): + worker = Worker(queue, repo) + worker.start() + workers.append(worker) + + descriptions = {} # type: Dict[str, List[Description]] + for worker in workers: + worker.join() + for desc in worker.response: + if desc.category not in descriptions: + descriptions[desc.category] = [] + descriptions[desc.category].append(desc) + + for descs in descriptions.values(): + descs.sort() + + return descriptions + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Generate a changelog in MD format between given tags. " + "It fetches all tags and unshallow the git repositore automatically", + ) + parser.add_argument( + "-v", + "--verbose", + action="count", + default=0, + help="set the script verbosity, could be used multiple", + ) + parser.add_argument( + "--output", + type=argparse.FileType("w"), + default="-", + help="output file for changelog", + ) + parser.add_argument( + "--repo", + default="ClickHouse/ClickHouse", + help="a repository to query for pull-requests from GitHub", + ) + parser.add_argument( + "--jobs", + type=int, + default=10, + help="number of jobs to get pull-requests info from GitHub API", + ) + parser.add_argument( + "--gh-user-or-token", + help="user name or GH token to authenticate", + ) + parser.add_argument( + "--gh-password", + help="a password that should be used when user is given", + ) + parser.add_argument( + "--from", + dest="from_ref", + help="git ref for a starting point of changelog, by default is calculated " + "automatically to match a previous tag in history", + ) + parser.add_argument( + "to_ref", + metavar="TO_REF", + help="git ref for the changelog end", + ) + args = parser.parse_args() + return args + + +# This function mirrors the PR description checks in ClickhousePullRequestTrigger. +# Returns False if the PR should not be mentioned changelog. +def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]: + backport_number = item.number + if item.head.ref.startswith("backport/"): + branch_parts = item.head.ref.split("/") + if len(branch_parts) == 3: + item = repo.get_pull(int(branch_parts[-1])) + else: + logging.warning( + "The branch %s doesn't match backport template, using PR %s as is", + item.head.ref, + item.number, + ) + description = item.body + # Don't skip empty lines because they delimit parts of description + lines = [x.strip() for x in (description.split("\n") if description else [])] + lines = [re.sub(r"\s+", " ", ln) for ln in lines] + + category = "" + entry = "" + + if lines: + i = 0 + while i < len(lines): + if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category itself. + # Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + elif re.match( + r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. + # Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + else: + i += 1 + + if not category: + # Shouldn't happen, because description check in CI should catch such PRs. + # Fall through, so that it shows up in output and the user can fix it. + category = "NO CL CATEGORY" + + # Filter out the PR categories that are not for changelog. + if re.match( + r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", + category, + ): + return None + + if backport_number != item.number: + entry = f"Backported in #{backport_number}: {entry}" + + if not entry: + # Shouldn't happen, because description check in CI should catch such PRs. + category = "NO CL ENTRY" + entry = "NO CL ENTRY: '" + item.title + "'" + + entry = entry.strip() + if entry[-1] != ".": + entry += "." + + for c in categories_preferred_order: + if ratio(category.lower(), c.lower()) >= 90: + category = c + break + + return Description(item.number, item.user, item.html_url, entry, category) + + +def write_changelog(fd: TextIO, descriptions: Dict[str, List[Description]]): + fd.write(f"### ClickHouse release {TO_REF} FIXME as compared to {FROM_REF}\n\n") + + seen_categories = [] # type: List[str] + for category in categories_preferred_order: + if category in descriptions: + seen_categories.append(category) + fd.write(f"#### {category}\n") + for desc in descriptions[category]: + fd.write(f"{desc.formatted_entry}\n") + + fd.write("\n") + + for category in descriptions: + if category not in seen_categories: + fd.write(f"#### {category}\n\n") + for desc in descriptions[category]: + fd.write(f"{desc.formatted_entry}\n") + + fd.write("\n") + + +def check_refs(from_ref: Optional[str], to_ref: str): + global FROM_REF, TO_REF + TO_REF = to_ref + + # Check TO_REF + runner.run(f"git rev-parse {TO_REF}") + + # Check from_ref + if from_ref is None: + FROM_REF = runner.run(f"git describe --abbrev=0 --tags '{TO_REF}~'") + # Check if the previsous tag is different for merge commits + # I __assume__ we won't have octopus merges, at least for the tagged commits + try: + alternative_tag = runner.run( + f"git describe --abbrev=0 --tags '{TO_REF}^2'", stderr=DEVNULL + ) + if FROM_REF != alternative_tag: + raise Exception( + f"Unable to get unified parent tag for {TO_REF}, " + f"define it manually, get {FROM_REF} and {alternative_tag}" + ) + except CalledProcessError: + pass + else: + runner.run(f"git rev-parse {FROM_REF}") + FROM_REF = from_ref + + +def main(): + log_levels = [logging.CRITICAL, logging.WARN, logging.INFO, logging.DEBUG] + args = parse_args() + logging.basicConfig( + format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s", + level=log_levels[min(args.verbose, 3)], + ) + # Get the full repo + if is_shallow(): + logging.info("Unshallow repository") + runner.run("git fetch --unshallow", stderr=DEVNULL) + logging.info("Fetching all tags") + runner.run("git fetch --tags", stderr=DEVNULL) + + check_refs(args.from_ref, args.to_ref) + + logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF) + + # Get starting and ending dates for gathering PRs + # Add one day after and before to mitigate TZ possible issues + # `tag^{}` format gives commit ref when we have annotated tags + from_date = runner.run(f"git log -1 --format=format:%as '{FROM_REF}^{{}}'") + from_date = (date.fromisoformat(from_date) - timedelta(1)).isoformat() + to_date = runner.run(f"git log -1 --format=format:%as '{TO_REF}^{{}}'") + to_date = (date.fromisoformat(to_date) + timedelta(1)).isoformat() + + # Get all PRs for the given time frame + gh = Github( + args.gh_user_or_token, args.gh_password, per_page=100, pool_size=args.jobs + ) + query = f"type:pr repo:{args.repo} is:merged merged:{from_date}..{to_date}" + repo = gh.get_repo(args.repo) + api_prs = gh.search_issues(query=query, sort="created") + logging.info("Found %s PRs for the query: '%s'", api_prs.totalCount, query) + + pr_numbers = [pr.number for pr in api_prs] + + descriptions = get_descriptions(repo, pr_numbers, args.jobs) + + write_changelog(args.output, descriptions) + + +if __name__ == "__main__": + main() diff --git a/utils/changelog/changelog.sh b/utils/changelog/changelog.sh deleted file mode 100755 index 52817acfae4..00000000000 --- a/utils/changelog/changelog.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -set -e - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -from="$1" -to="$2" -log_command=(git log "$from..$to" --first-parent) - -"${log_command[@]}" > "changelog-log.txt" - -# Check for diamond merges. -if "${log_command[@]}" --oneline --grep "Merge branch '" | grep '' -then - # DO NOT ADD automated handling of diamond merges to this script. - # It is an unsustainable way to work with git, and it MUST be visible. - echo Warning: suspected diamond merges above. - echo Some commits will be missed, review these manually. -fi - -# Search for PR numbers in commit messages. First variant is normal merge, and second -# variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") - -# awk is to filter out small task numbers from different task tracker, which are -# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. -"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" - -echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." -if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi - -function github_download() -{ - local url=${1} - local file=${2} - if ! [ -f "$file" ] - then - echo "curl -u \"$GITHUB_USER:***\" -sSf \"$url\" > \"$file\"" - - if ! curl -u "$GITHUB_USER:$GITHUB_TOKEN" \ - -sSf "$url" \ - > "$file" - then - >&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'." - rm "$file" - return 1 - fi - sleep 0.1 - fi -} - -rm changelog-prs-filtered.txt &> /dev/null ||: -for pr in $(cat "changelog-prs.txt") -do - # Download PR info from github. - file="pr$pr.json" - github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue - - if ! [ "$pr" == "$(jq -r .number "$file")" ] - then - >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." - continue - fi - - # Filter out PRs by bots. - user_login=$(jq -r .user.login "$file") - - filter_bot=$(echo "$user_login" | grep -q "\[bot\]$" && echo "Skip." || echo "Ok." ||:) - filter_robot=$(echo "$user_login" | grep -q "robot-clickhouse" && echo "Skip." || echo "Ok." ||:) - - if [ "Skip." == "$filter_robot" ] || [ "Skip." == "$filter_bot" ] - then - continue - fi - - # Download author info from github. - user_id=$(jq -r .user.id "$file") - user_file="user$user_id.json" - github_download "$(jq -r .user.url "$file")" "$user_file" || continue - - if ! [ "$user_id" == "$(jq -r .id "$user_file")" ] - then - >&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')." - continue - fi - - echo "$pr" >> changelog-prs-filtered.txt -done - -echo "### ClickHouse release $to FIXME as compared to $from -" > changelog.md -"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md -cat changelog.md diff --git a/utils/changelog/format-changelog.py b/utils/changelog/format-changelog.py deleted file mode 100755 index ef1340d48dd..00000000000 --- a/utils/changelog/format-changelog.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/python3 - -import argparse -import collections -import fuzzywuzzy.fuzz -import itertools -import json -import os -import re -import sys - -parser = argparse.ArgumentParser(description="Format changelog for given PRs.") -parser.add_argument( - "file", - metavar="FILE", - type=argparse.FileType("r", encoding="utf-8"), - nargs="?", - default=sys.stdin, - help="File with PR numbers, one per line.", -) -args = parser.parse_args() - -# This function mirrors the PR description checks in ClickhousePullRequestTrigger. -# Returns False if the PR should not be mentioned changelog. -def parse_one_pull_request(item): - description = item["body"] - # Don't skip empty lines because they delimit parts of description - lines = [ - line - for line in [ - x.strip() for x in (description.split("\n") if description else []) - ] - ] - lines = [re.sub(r"\s+", " ", l) for l in lines] - - category = "" - entry = "" - - if lines: - i = 0 - while i < len(lines): - if re.match(r"(?i)^[>*_ ]*change\s*log\s*category", lines[i]): - i += 1 - if i >= len(lines): - break - # Can have one empty line between header and the category itself. Filter it out. - if not lines[i]: - i += 1 - if i >= len(lines): - break - category = re.sub(r"^[-*\s]*", "", lines[i]) - i += 1 - elif re.match( - r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] - ): - i += 1 - # Can have one empty line between header and the entry itself. Filter it out. - if i < len(lines) and not lines[i]: - i += 1 - # All following lines until empty one are the changelog entry. - entry_lines = [] - while i < len(lines) and lines[i]: - entry_lines.append(lines[i]) - i += 1 - entry = " ".join(entry_lines) - else: - i += 1 - - if not category: - # Shouldn't happen, because description check in CI should catch such PRs. - # Fall through, so that it shows up in output and the user can fix it. - category = "NO CL CATEGORY" - - # Filter out the PR categories that are not for changelog. - if re.match( - r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", - category, - ): - return False - - if not entry: - # Shouldn't happen, because description check in CI should catch such PRs. - category = "NO CL ENTRY" - entry = "NO CL ENTRY: '" + item["title"] + "'" - - entry = entry.strip() - if entry[-1] != ".": - entry += "." - - item["entry"] = entry - item["category"] = category - - return True - - -# This array gives the preferred category order, and is also used to -# normalize category names. -categories_preferred_order = [ - "Backward Incompatible Change", - "New Feature", - "Performance Improvement", - "Improvement", - "Bug Fix", - "Build/Testing/Packaging Improvement", - "Other", -] - -category_to_pr = collections.defaultdict(lambda: []) -users = {} -for line in args.file: - pr = json.loads(open(f"pr{line.strip()}.json").read()) - assert pr["number"] - if not parse_one_pull_request(pr): - continue - - assert pr["category"] - - # Normalize category name - for c in categories_preferred_order: - if fuzzywuzzy.fuzz.ratio(pr["category"].lower(), c.lower()) >= 90: - pr["category"] = c - break - - category_to_pr[pr["category"]].append(pr) - user_id = pr["user"]["id"] - users[user_id] = json.loads(open(f"user{user_id}.json").read()) - - -def print_category(category): - print(("#### " + category)) - print() - for pr in category_to_pr[category]: - user = users[pr["user"]["id"]] - user_name = user["name"] if user["name"] else user["login"] - - # Substitute issue links. - # 1) issue number w/o markdown link - pr["entry"] = re.sub( - r"([^[])#([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - pr["entry"], - ) - # 2) issue URL w/o markdown link - pr["entry"] = re.sub( - r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - pr["entry"], - ) - - print( - f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).' - ) - - print() - - -# Print categories in preferred order -for category in categories_preferred_order: - if category in category_to_pr: - print_category(category) - category_to_pr.pop(category) - -# Print the rest of the categories -for category in category_to_pr: - print_category(category) diff --git a/utils/changelog/git_helper.py b/utils/changelog/git_helper.py new file mode 120000 index 00000000000..03b05a7eddd --- /dev/null +++ b/utils/changelog/git_helper.py @@ -0,0 +1 @@ +../../tests/ci/git_helper.py \ No newline at end of file diff --git a/utils/changelog/requirements.txt b/utils/changelog/requirements.txt new file mode 100644 index 00000000000..106e9e2c72d --- /dev/null +++ b/utils/changelog/requirements.txt @@ -0,0 +1,3 @@ +fuzzywuzzy +PyGitHub +python-Levenshtein