diff --git a/.clang-tidy b/.clang-tidy index 0400b500e5c..6fd67876923 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -16,7 +16,6 @@ Checks: '-*, modernize-make-unique, modernize-raw-string-literal, modernize-redundant-void-arg, - modernize-replace-auto-ptr, modernize-replace-random-shuffle, modernize-use-bool-literals, modernize-use-nullptr, @@ -145,6 +144,7 @@ Checks: '-*, clang-analyzer-cplusplus.SelfAssignment, clang-analyzer-deadcode.DeadStores, clang-analyzer-cplusplus.Move, + clang-analyzer-optin.cplusplus.UninitializedObject, clang-analyzer-optin.cplusplus.VirtualCall, clang-analyzer-security.insecureAPI.UncheckedReturn, clang-analyzer-security.insecureAPI.bcmp, @@ -164,6 +164,8 @@ Checks: '-*, clang-analyzer-unix.cstring.NullArg, boost-use-to-string, + + alpha.security.cert.env.InvalidPtr, ' WarningsAsErrors: '*' @@ -210,3 +212,6 @@ CheckOptions: value: false - key: performance-move-const-arg.CheckTriviallyCopyableMove value: false + # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097 + - key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp + value: expr-type diff --git a/.gitattributes b/.gitattributes index bcc7d57b904..a23f027122b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,3 @@ contrib/* linguist-vendored *.h linguist-language=C++ +tests/queries/0_stateless/data_json/* binary diff --git a/.github/ISSUE_TEMPLATE/10_question.md b/.github/ISSUE_TEMPLATE/10_question.md index a112b9599d5..5b3d00a3180 100644 --- a/.github/ISSUE_TEMPLATE/10_question.md +++ b/.github/ISSUE_TEMPLATE/10_question.md @@ -7,6 +7,6 @@ assignees: '' --- -> Make sure to check documentation https://clickhouse.yandex/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse +> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse > If you still prefer GitHub issues, remove all this text and ask your question here. diff --git a/.github/ISSUE_TEMPLATE/50_build-issue.md b/.github/ISSUE_TEMPLATE/50_build-issue.md index a358575cd7c..9b05fbbdd13 100644 --- a/.github/ISSUE_TEMPLATE/50_build-issue.md +++ b/.github/ISSUE_TEMPLATE/50_build-issue.md @@ -7,7 +7,7 @@ assignees: '' --- -> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.yandex/docs/en/development/build/ +> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/ **Operating system** diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6540b60476f..2d8540b57ea 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ -Changelog category (leave one): +### Changelog category (leave one): - New Feature - Improvement - Bug Fix (user-visible misbehaviour in official stable or prestable release) @@ -9,7 +9,7 @@ Changelog category (leave one): - Not for changelog (changelog entry is not required) -Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md): +### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md): ... diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 75f8a63368d..44fe082b04d 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -9,6 +9,18 @@ on: # yamllint disable-line rule:truthy branches: - 'backport/**' jobs: + PythonUnitTests: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Python unit tests + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: @@ -143,8 +155,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -184,8 +196,8 @@ jobs: - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -229,8 +241,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -274,8 +286,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -319,8 +331,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -341,10 +353,15 @@ jobs: steps: - name: Set envs run: | + DEPENDENCIES=$(cat << 'EOF' | jq '. | length' + ${{ toJSON(needs) }} + EOF + ) + echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/report_check - REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=ClickHouse build check (actions) + REPORTS_PATH=${{runner.temp}}/reports_dir + TEMP_PATH=${{runner.temp}}/report_check EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -360,7 +377,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" + python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" - name: Cleanup if: always() run: | diff --git a/.github/workflows/debug.yml b/.github/workflows/debug.yml index 7bb5ac65140..fa980a95a39 100644 --- a/.github/workflows/debug.yml +++ b/.github/workflows/debug.yml @@ -2,7 +2,7 @@ name: Debug 'on': - [push, pull_request, release] + [push, pull_request, release, workflow_dispatch] jobs: DebugInfo: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 5816a58081d..efaf1c64c05 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -149,7 +149,6 @@ jobs: sudo rm -fr "$TEMP_PATH" SplitBuildSmokeTest: needs: [BuilderDebSplitted] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, style-checker] steps: - name: Set envs @@ -220,8 +219,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -261,8 +260,8 @@ jobs: - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -306,8 +305,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -316,7 +315,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -352,8 +350,53 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinGCC: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=binary_gcc + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -397,8 +440,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -442,8 +485,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -487,8 +530,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -532,8 +575,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -577,8 +620,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -590,7 +633,6 @@ jobs: ########################################################################################## BuilderDebSplitted: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -626,8 +668,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -636,7 +678,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinTidy: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -672,8 +713,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -682,7 +723,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwin: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -718,8 +758,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -728,7 +768,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinAarch64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -764,8 +803,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -774,7 +813,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinFreeBSD: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -810,8 +848,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -820,7 +858,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwinAarch64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -856,8 +893,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -866,7 +903,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinPPC64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -902,8 +938,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -911,6 +947,34 @@ jobs: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ +##################################### Docker images ####################################### +############################################################################################ + DockerServerImages: + needs: + - BuilderDebRelease + - BuilderDebAarch64 + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head + python3 docker_server.py --release-type head --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" +############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: @@ -918,6 +982,7 @@ jobs: - BuilderDebRelease - BuilderDebAarch64 - BuilderBinRelease + - BuilderBinGCC - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan @@ -927,10 +992,16 @@ jobs: steps: - name: Set envs run: | + DEPENDENCIES=$(cat << 'EOF' | jq '. | length' + ${{ toJSON(needs) }} + EOF + ) + echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/report_check - REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=ClickHouse build check (actions) + REPORTS_PATH=${{runner.temp}}/reports_dir + REPORTS_PATH=${{runner.temp}}/reports_dir + TEMP_PATH=${{runner.temp}}/report_check EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -946,7 +1017,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" + python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" - name: Cleanup if: always() run: | @@ -2608,6 +2679,40 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" + UnitTestsReleaseGCC: + needs: [BuilderBinGCC] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/unit_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Unit tests (release-gcc, actions) + REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Unit test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 unit_tests_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a172947b2fc..bd54fd975c0 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -7,6 +7,7 @@ env: "on": schedule: - cron: '13 3 * * *' + workflow_dispatch: jobs: DockerHubPushAarch64: @@ -71,3 +72,53 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/changed_images.json + BuilderCoverity: + needs: DockerHubPush + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + BUILD_NAME=coverity + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + TEMP_PATH=${{runner.temp}}/build_check + EOF + echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + id: coverity-checkout + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + - name: Upload Coverity Analysis + if: ${{ success() || failure() }} + run: | + curl --form token="${COVERITY_TOKEN}" \ + --form email='security+coverity@clickhouse.com' \ + --form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tgz" \ + --form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \ + --form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \ + https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 4a3880543c4..0e68a68e8f5 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -4,7 +4,7 @@ env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 -on: # yamllint disable-line rule:truthy +on: # yamllint disable-line rule:truthy pull_request: types: - synchronize @@ -153,13 +153,19 @@ jobs: EOF - name: Clear repository run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Fast Test - run: | + sudo rm -fr "$GITHUB_WORKSPACE" + mkdir "$GITHUB_WORKSPACE" sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.TEMP_PATH }} + - name: Fast Test + run: | cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 fast_test_check.py - name: Cleanup @@ -272,8 +278,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -317,8 +323,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -362,8 +368,50 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinGCC: + needs: [DockerHubPush, FastTest] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=binary_gcc + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -404,8 +452,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -449,8 +497,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -494,8 +542,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -539,8 +587,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -584,8 +632,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -629,8 +677,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -677,8 +725,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -722,8 +770,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -767,8 +815,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -812,8 +860,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -857,8 +905,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -902,8 +950,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -947,8 +995,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -956,6 +1004,34 @@ jobs: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ +##################################### Docker images ####################################### +############################################################################################ + DockerServerImages: + needs: + - BuilderDebRelease + - BuilderDebAarch64 + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head --no-push + python3 docker_server.py --release-type head --no-push --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" +############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: @@ -963,6 +1039,7 @@ jobs: - BuilderDebRelease - BuilderDebAarch64 - BuilderBinRelease + - BuilderBinGCC - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan @@ -973,10 +1050,15 @@ jobs: steps: - name: Set envs run: | + DEPENDENCIES=$(cat << 'EOF' | jq '. | length' + ${{ toJSON(needs) }} + EOF + ) + echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/report_check - REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=ClickHouse build check (actions) + REPORTS_PATH=${{runner.temp}}/reports_dir + TEMP_PATH=${{runner.temp}}/report_check EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -992,7 +1074,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" + python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" - name: Cleanup if: always() run: | @@ -1733,6 +1815,51 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" + TestsBugfixCheck: + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/tests_bugfix_check + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Tests bugfix validate check (actions) + KILL_TIMEOUT=3600 + REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Bugfix test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + + TEMP_PATH="${TEMP_PATH}/integration" \ + REPORTS_PATH="${REPORTS_PATH}/integration" \ + python3 integration_test_check.py "Integration tests bugfix validate check" \ + --validate-bugfix --post-commit-status=file || echo 'ignore exit code' + + TEMP_PATH="${TEMP_PATH}/stateless" \ + REPORTS_PATH="${REPORTS_PATH}/stateless" \ + python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \ + --validate-bugfix --post-commit-status=file || echo 'ignore exit code' + + python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ############################ FUNCTIONAl STATEFUL TESTS ####################################### ############################################################################################## @@ -2763,6 +2890,40 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" + UnitTestsReleaseGCC: + needs: [BuilderBinGCC] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/unit_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Unit tests (release-gcc, actions) + REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Unit test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 unit_tests_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -3016,6 +3177,7 @@ jobs: needs: - StyleCheck - DockerHubPush + - DockerServerImages - CheckLabels - BuilderReport - FastTest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0fe025080dd..ea2e1ed33fb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,7 +32,32 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: ${{runner.temp}}/release_packages/* + file: ${{runner.temp}}/push_to_artifactory/* overwrite: true tag: ${{ github.ref }} file_glob: true + ############################################################################################ + ##################################### Docker images ####################################### + ############################################################################################ + DockerServerImages: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type auto --version "${{ github.ref }}" + python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index d916699acc2..91e1a224204 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -146,8 +146,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -187,8 +187,8 @@ jobs: - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -232,8 +232,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -277,8 +277,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -322,8 +322,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -367,8 +367,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -412,8 +412,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -436,10 +436,16 @@ jobs: steps: - name: Set envs run: | + DEPENDENCIES=$(cat << 'EOF' | jq '. | length' + ${{ toJSON(needs) }} + EOF + ) + echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/report_check - REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=ClickHouse build check (actions) + REPORTS_PATH=${{runner.temp}}/reports_dir + REPORTS_PATH=${{runner.temp}}/reports_dir + TEMP_PATH=${{runner.temp}}/report_check EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -455,7 +461,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" + python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" - name: Cleanup if: always() run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bbadb2115d..100b03ab92b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,139 @@ -### ClickHouse release v22.2, 2022-02-17 +### Table of Contents +**[ClickHouse release v22.3-lts, 2022-03-17](#223)**
+**[ClickHouse release v22.2, 2022-02-17](#222)**
+**[ClickHouse release v22.1, 2022-01-18](#221)**
+**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**
+ + +## ClickHouse release v22.3-lts, 2022-03-17 + +#### Backward Incompatible Change + +* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). +* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). +* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). + +#### New Feature + +* Support for caching data locally for remote filesystems. It can be enabled for `s3` disks. Closes [#28961](https://github.com/ClickHouse/ClickHouse/issues/28961). [#33717](https://github.com/ClickHouse/ClickHouse/pull/33717) ([Kseniia Sumarokova](https://github.com/kssenii)). In the meantime, we enabled the test suite on s3 filesystem and no more known issues exist, so it is started to be production ready. +* Add new table function `hive`. It can be used as follows `hive('', '', '', '', '')` for example `SELECT * FROM hive('thrift://hivetest:9083', 'test', 'demo', 'id Nullable(String), score Nullable(Int32), day Nullable(String)', 'day')`. [#34946](https://github.com/ClickHouse/ClickHouse/pull/34946) ([lgbo](https://github.com/lgbo-ustc)). +* Support authentication of users connected via SSL by their X.509 certificate. [#31484](https://github.com/ClickHouse/ClickHouse/pull/31484) ([eungenue](https://github.com/eungenue)). +* Support schema inference for inserting into table functions `file`/`hdfs`/`s3`/`url`. [#34732](https://github.com/ClickHouse/ClickHouse/pull/34732) ([Kruglov Pavel](https://github.com/Avogar)). +* Now you can read `system.zookeeper` table without restrictions on path or using `like` expression. This reads can generate quite heavy load for zookeeper so to enable this ability you have to enable setting `allow_unrestricted_reads_from_keeper`. [#34609](https://github.com/ClickHouse/ClickHouse/pull/34609) ([Sergei Trifonov](https://github.com/serxa)). +* Display CPU and memory metrics in clickhouse-local. Close [#34545](https://github.com/ClickHouse/ClickHouse/issues/34545). [#34605](https://github.com/ClickHouse/ClickHouse/pull/34605) ([李扬](https://github.com/taiyang-li)). +* Implement `startsWith` and `endsWith` function for arrays, closes [#33982](https://github.com/ClickHouse/ClickHouse/issues/33982). [#34368](https://github.com/ClickHouse/ClickHouse/pull/34368) ([usurai](https://github.com/usurai)). +* Add three functions for Map data type: 1. `mapReplace(map1, map2)` - replaces values for keys in map1 with the values of the corresponding keys in map2; adds keys from map2 that don't exist in map1. 2. `mapFilter` 3. `mapMap`. mapFilter and mapMap are higher order functions, accepting two arguments, the first argument is a lambda function with k, v pair as arguments, the second argument is a column of type Map. [#33698](https://github.com/ClickHouse/ClickHouse/pull/33698) ([hexiaoting](https://github.com/hexiaoting)). +* Allow getting default user and password for clickhouse-client from the `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables. Close [#34538](https://github.com/ClickHouse/ClickHouse/issues/34538). [#34947](https://github.com/ClickHouse/ClickHouse/pull/34947) ([DR](https://github.com/freedomDR)). + +#### Experimental Feature + +* New data type `Object()`, which supports storing of semi-structured data (for now JSON only). Data is written to such types as string. Then all paths are extracted according to format of semi-structured data and written as separate columns in most optimal types, that can store all their values. Those columns can be queried by names that match paths in source data. E.g `data.key1.key2` or with cast operator `data.key1.key2::Int64`. +* Add `database_replicated_allow_only_replicated_engine` setting. When enabled, it only allowed to only create `Replicated` tables or tables with stateless engines in `Replicated` databases. [#35214](https://github.com/ClickHouse/ClickHouse/pull/35214) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). Note that `Replicated` database is still an experimental feature. + +#### Performance Improvement + +* Improve performance of insertion into `MergeTree` tables by optimizing sorting. Up to 2x improvement is observed on realistic benchmarks. [#34750](https://github.com/ClickHouse/ClickHouse/pull/34750) ([Maksim Kita](https://github.com/kitaisreal)). +* Columns pruning when reading Parquet, ORC and Arrow files from URL and S3. Closes [#34163](https://github.com/ClickHouse/ClickHouse/issues/34163). [#34849](https://github.com/ClickHouse/ClickHouse/pull/34849) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Columns pruning when reading Parquet, ORC and Arrow files from Hive. [#34954](https://github.com/ClickHouse/ClickHouse/pull/34954) ([lgbo](https://github.com/lgbo-ustc)). +* A bunch of performance optimizations from a performance superhero. Improve performance of processing queries with large `IN` section. Improve performance of `direct` dictionary if its source is `ClickHouse`. Improve performance of `detectCharset `, `detectLanguageUnknown ` functions. [#34888](https://github.com/ClickHouse/ClickHouse/pull/34888) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of `any` aggregate function by using more batching. [#34760](https://github.com/ClickHouse/ClickHouse/pull/34760) ([Raúl Marín](https://github.com/Algunenano)). +* Multiple improvements for performance of `clickhouse-keeper`: less locking [#35010](https://github.com/ClickHouse/ClickHouse/pull/35010) ([zhanglistar](https://github.com/zhanglistar)), lower memory usage by streaming reading and writing of snapshot instead of full copy. [#34584](https://github.com/ClickHouse/ClickHouse/pull/34584) ([zhanglistar](https://github.com/zhanglistar)), optimizing compaction of log store in the RAFT implementation. [#34534](https://github.com/ClickHouse/ClickHouse/pull/34534) ([zhanglistar](https://github.com/zhanglistar)), versioning of the internal data structure [#34486](https://github.com/ClickHouse/ClickHouse/pull/34486) ([zhanglistar](https://github.com/zhanglistar)). + +#### Improvement + +* Allow asynchronous inserts to table functions. Fixes [#34864](https://github.com/ClickHouse/ClickHouse/issues/34864). [#34866](https://github.com/ClickHouse/ClickHouse/pull/34866) ([Anton Popov](https://github.com/CurtizJ)). +* Implicit type casting of the key argument for functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants`. Closes [#34970](https://github.com/ClickHouse/ClickHouse/issues/34970). [#35027](https://github.com/ClickHouse/ClickHouse/pull/35027) ([Maksim Kita](https://github.com/kitaisreal)). +* `EXPLAIN AST` query can output AST in form of a graph in Graphviz format: `EXPLAIN AST graph = 1 SELECT * FROM system.parts`. [#35173](https://github.com/ClickHouse/ClickHouse/pull/35173) ([李扬](https://github.com/taiyang-li)). +* When large files were written with `s3` table function or table engine, the content type on the files was mistakenly set to `application/xml` due to a bug in the AWS SDK. This closes [#33964](https://github.com/ClickHouse/ClickHouse/issues/33964). [#34433](https://github.com/ClickHouse/ClickHouse/pull/34433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change restrictive row policies a bit to make them an easier alternative to permissive policies in easy cases. If for a particular table only restrictive policies exist (without permissive policies) users will be able to see some rows. Also `SHOW CREATE ROW POLICY` will always show `AS permissive` or `AS restrictive` in row policy's definition. [#34596](https://github.com/ClickHouse/ClickHouse/pull/34596) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve schema inference with globs in File/S3/HDFS/URL engines. Try to use the next path for schema inference in case of error. [#34465](https://github.com/ClickHouse/ClickHouse/pull/34465) ([Kruglov Pavel](https://github.com/Avogar)). +* Play UI now correctly detects the preferred light/dark theme from the OS. [#35068](https://github.com/ClickHouse/ClickHouse/pull/35068) ([peledni](https://github.com/peledni)). +* Added `date_time_input_format = 'best_effort_us'`. Closes [#34799](https://github.com/ClickHouse/ClickHouse/issues/34799). [#34982](https://github.com/ClickHouse/ClickHouse/pull/34982) ([WenYao](https://github.com/Cai-Yao)). +* A new settings called `allow_plaintext_password` and `allow_no_password` are added in server configuration which turn on/off authentication types that can be potentially insecure in some environments. They are allowed by default. [#34738](https://github.com/ClickHouse/ClickHouse/pull/34738) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Support for `DateTime64` data type in `Arrow` format, closes [#8280](https://github.com/ClickHouse/ClickHouse/issues/8280) and closes [#28574](https://github.com/ClickHouse/ClickHouse/issues/28574). [#34561](https://github.com/ClickHouse/ClickHouse/pull/34561) ([李扬](https://github.com/taiyang-li)). +* Reload `remote_url_allow_hosts` (filtering of outgoing connections) on config update. [#35294](https://github.com/ClickHouse/ClickHouse/pull/35294) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Support `--testmode` parameter for `clickhouse-local`. This parameter enables interpretation of test hints that we use in functional tests. [#35264](https://github.com/ClickHouse/ClickHouse/pull/35264) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `distributed_depth` to query log. It is like a more detailed variant of `is_initial_query` [#35207](https://github.com/ClickHouse/ClickHouse/pull/35207) ([李扬](https://github.com/taiyang-li)). +* Respect `remote_url_allow_hosts` for `MySQL` and `PostgreSQL` table functions. [#35191](https://github.com/ClickHouse/ClickHouse/pull/35191) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Added `disk_name` field to `system.part_log`. [#35178](https://github.com/ClickHouse/ClickHouse/pull/35178) ([Artyom Yurkov](https://github.com/Varinara)). +* Do not retry non-rertiable errors when querying remote URLs. Closes [#35161](https://github.com/ClickHouse/ClickHouse/issues/35161). [#35172](https://github.com/ClickHouse/ClickHouse/pull/35172) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support distributed INSERT SELECT queries (the setting `parallel_distributed_insert_select`) table function `view()`. [#35132](https://github.com/ClickHouse/ClickHouse/pull/35132) ([Azat Khuzhin](https://github.com/azat)). +* More precise memory tracking during `INSERT` into `Buffer` with `AggregateFunction`. [#35072](https://github.com/ClickHouse/ClickHouse/pull/35072) ([Azat Khuzhin](https://github.com/azat)). +* Avoid division by zero in Query Profiler if Linux kernel has a bug. Closes [#34787](https://github.com/ClickHouse/ClickHouse/issues/34787). [#35032](https://github.com/ClickHouse/ClickHouse/pull/35032) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add more sanity checks for keeper configuration: now mixing of localhost and non-local servers is not allowed, also add checks for same value of internal raft port and keeper client port. [#35004](https://github.com/ClickHouse/ClickHouse/pull/35004) ([alesapin](https://github.com/alesapin)). +* Currently, if the user changes the settings of the system tables there will be tons of logs and ClickHouse will rename the tables every minute. This fixes [#34929](https://github.com/ClickHouse/ClickHouse/issues/34929). [#34949](https://github.com/ClickHouse/ClickHouse/pull/34949) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use connection pool for Hive metastore client. [#34940](https://github.com/ClickHouse/ClickHouse/pull/34940) ([lgbo](https://github.com/lgbo-ustc)). +* Ignore per-column `TTL` in `CREATE TABLE AS` if new table engine does not support it (i.e. if the engine is not of `MergeTree` family). [#34938](https://github.com/ClickHouse/ClickHouse/pull/34938) ([Azat Khuzhin](https://github.com/azat)). +* Allow `LowCardinality` strings for `ngrambf_v1`/`tokenbf_v1` indexes. Closes [#21865](https://github.com/ClickHouse/ClickHouse/issues/21865). [#34911](https://github.com/ClickHouse/ClickHouse/pull/34911) ([Lars Hiller Eidnes](https://github.com/larspars)). +* Allow opening empty sqlite db if the file doesn't exist. Closes [#33367](https://github.com/ClickHouse/ClickHouse/issues/33367). [#34907](https://github.com/ClickHouse/ClickHouse/pull/34907) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Implement memory statistics for FreeBSD - this is required for `max_server_memory_usage` to work correctly. [#34902](https://github.com/ClickHouse/ClickHouse/pull/34902) ([Alexandre Snarskii](https://github.com/snar)). +* In previous versions the progress bar in clickhouse-client can jump forward near 50% for no reason. This closes [#34324](https://github.com/ClickHouse/ClickHouse/issues/34324). [#34801](https://github.com/ClickHouse/ClickHouse/pull/34801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now `ALTER TABLE DROP COLUMN columnX` queries for `MergeTree` table engines will work instantly when `columnX` is an `ALIAS` column. Fixes [#34660](https://github.com/ClickHouse/ClickHouse/issues/34660). [#34786](https://github.com/ClickHouse/ClickHouse/pull/34786) ([alesapin](https://github.com/alesapin)). +* Show hints when user mistyped the name of a data skipping index. Closes [#29698](https://github.com/ClickHouse/ClickHouse/issues/29698). [#34764](https://github.com/ClickHouse/ClickHouse/pull/34764) ([flynn](https://github.com/ucasfl)). +* Support `remote()`/`cluster()` table functions for `parallel_distributed_insert_select`. [#34728](https://github.com/ClickHouse/ClickHouse/pull/34728) ([Azat Khuzhin](https://github.com/azat)). +* Do not reset logging that configured via `--log-file`/`--errorlog-file` command line options in case of empty configuration in the config file. [#34718](https://github.com/ClickHouse/ClickHouse/pull/34718) ([Amos Bird](https://github.com/amosbird)). +* Extract schema only once on table creation and prevent reading from local files/external sources to extract schema on each server startup. [#34684](https://github.com/ClickHouse/ClickHouse/pull/34684) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow specifying argument names for executable UDFs. This is necessary for formats where argument name is part of serialization, like `Native`, `JSONEachRow`. Closes [#34604](https://github.com/ClickHouse/ClickHouse/issues/34604). [#34653](https://github.com/ClickHouse/ClickHouse/pull/34653) ([Maksim Kita](https://github.com/kitaisreal)). +* `MaterializedMySQL` (experimental feature) now supports `materialized_mysql_tables_list` (a comma-separated list of MySQL database tables, which will be replicated by the MaterializedMySQL database engine. Default value: empty list — means all the tables will be replicated), mentioned at [#32977](https://github.com/ClickHouse/ClickHouse/issues/32977). [#34487](https://github.com/ClickHouse/ClickHouse/pull/34487) ([zzsmdfj](https://github.com/zzsmdfj)). +* Improve OpenTelemetry span logs for INSERT operation on distributed table. [#34480](https://github.com/ClickHouse/ClickHouse/pull/34480) ([Frank Chen](https://github.com/FrankChen021)). +* Make the znode `ctime` and `mtime` consistent between servers in ClickHouse Keeper. [#33441](https://github.com/ClickHouse/ClickHouse/pull/33441) ([小路](https://github.com/nicelulu)). + +#### Build/Testing/Packaging Improvement + +* Package repository is migrated to JFrog Artifactory (**Mikhail f. Shiryaev**). +* Randomize some settings in functional tests, so more possible combinations of settings will be tested. This is yet another fuzzing method to ensure better test coverage. This closes [#32268](https://github.com/ClickHouse/ClickHouse/issues/32268). [#34092](https://github.com/ClickHouse/ClickHouse/pull/34092) ([Kruglov Pavel](https://github.com/Avogar)). +* Drop PVS-Studio from our CI. [#34680](https://github.com/ClickHouse/ClickHouse/pull/34680) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add an ability to build stripped binaries with CMake. In previous versions it was performed by dh-tools. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)). +* Smaller "fat-free" `clickhouse-keeper` build. [#35031](https://github.com/ClickHouse/ClickHouse/pull/35031) ([alesapin](https://github.com/alesapin)). +* Use @robot-clickhouse as an author and committer for PRs like https://github.com/ClickHouse/ClickHouse/pull/34685. [#34793](https://github.com/ClickHouse/ClickHouse/pull/34793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Limit DWARF version for debug info by 4 max, because our internal stack symbolizer cannot parse DWARF version 5. This makes sense if you compile ClickHouse with clang-15. [#34777](https://github.com/ClickHouse/ClickHouse/pull/34777) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `clickhouse-test` debian package as unneeded complication. CI use tests from repository and standalone testing via deb package is no longer supported. [#34606](https://github.com/ClickHouse/ClickHouse/pull/34606) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* A fix for HDFS integration: When the inner buffer size is too small, NEED_MORE_INPUT in `HadoopSnappyDecoder` will run multi times (>=3) for one compressed block. This makes the input data be copied into the wrong place in `HadoopSnappyDecoder::buffer`. [#35116](https://github.com/ClickHouse/ClickHouse/pull/35116) ([lgbo](https://github.com/lgbo-ustc)). +* Ignore obsolete grants in ATTACH GRANT statements. This PR fixes [#34815](https://github.com/ClickHouse/ClickHouse/issues/34815). [#34855](https://github.com/ClickHouse/ClickHouse/pull/34855) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix segfault in Postgres database when getting create table query if database was created using named collections. Closes [#35312](https://github.com/ClickHouse/ClickHouse/issues/35312). [#35313](https://github.com/ClickHouse/ClickHouse/pull/35313) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix partial merge join duplicate rows bug, close [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009). [#35311](https://github.com/ClickHouse/ClickHouse/pull/35311) ([Vladimir C](https://github.com/vdimir)). +* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using bzip2 compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35300](https://github.com/ClickHouse/ClickHouse/pull/35300) ([Kruglov Pavel](https://github.com/Avogar)). While using lz4 compression with a small max_read_buffer_size setting value. [#35296](https://github.com/ClickHouse/ClickHouse/pull/35296) ([Kruglov Pavel](https://github.com/Avogar)). While using lzma compression with small `max_read_buffer_size` setting value. [#35295](https://github.com/ClickHouse/ClickHouse/pull/35295) ([Kruglov Pavel](https://github.com/Avogar)). While using `brotli` compression with a small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35281](https://github.com/ClickHouse/ClickHouse/pull/35281) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible segfault in `JSONEachRow` schema inference. [#35291](https://github.com/ClickHouse/ClickHouse/pull/35291) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `CHECK TABLE` query in case when sparse columns are enabled in table. [#35274](https://github.com/ClickHouse/ClickHouse/pull/35274) ([Anton Popov](https://github.com/CurtizJ)). +* Avoid std::terminate in case of exception in reading from remote VFS. [#35257](https://github.com/ClickHouse/ClickHouse/pull/35257) ([Azat Khuzhin](https://github.com/azat)). +* Fix reading port from config, close [#34776](https://github.com/ClickHouse/ClickHouse/issues/34776). [#35193](https://github.com/ClickHouse/ClickHouse/pull/35193) ([Vladimir C](https://github.com/vdimir)). +* Fix error in query with `WITH TOTALS` in case if `HAVING` returned empty result. This fixes [#33711](https://github.com/ClickHouse/ClickHouse/issues/33711). [#35186](https://github.com/ClickHouse/ClickHouse/pull/35186) ([Amos Bird](https://github.com/amosbird)). +* Fix a corner case of `replaceRegexpAll`, close [#35117](https://github.com/ClickHouse/ClickHouse/issues/35117). [#35182](https://github.com/ClickHouse/ClickHouse/pull/35182) ([Vladimir C](https://github.com/vdimir)). +* Schema inference didn't work properly on case of `INSERT INTO FUNCTION s3(...) FROM ...`, it tried to read schema from s3 file instead of from select query. [#35176](https://github.com/ClickHouse/ClickHouse/pull/35176) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix MaterializedPostgreSQL (experimental feature) `table overrides` for partition by, etc. Closes [#35048](https://github.com/ClickHouse/ClickHouse/issues/35048). [#35162](https://github.com/ClickHouse/ClickHouse/pull/35162) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix MaterializedPostgreSQL (experimental feature) adding new table to replication (ATTACH TABLE) after manually removing (DETACH TABLE). Closes [#33800](https://github.com/ClickHouse/ClickHouse/issues/33800). Closes [#34922](https://github.com/ClickHouse/ClickHouse/issues/34922). Closes [#34315](https://github.com/ClickHouse/ClickHouse/issues/34315). [#35158](https://github.com/ClickHouse/ClickHouse/pull/35158) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix partition pruning error when non-monotonic function is used with IN operator. This fixes [#35136](https://github.com/ClickHouse/ClickHouse/issues/35136). [#35146](https://github.com/ClickHouse/ClickHouse/pull/35146) ([Amos Bird](https://github.com/amosbird)). +* Fixed slightly incorrect translation of YAML configs to XML. [#35135](https://github.com/ClickHouse/ClickHouse/pull/35135) ([Miel Donkers](https://github.com/mdonkers)). +* Fix `optimize_skip_unused_shards_rewrite_in` for signed columns and negative values. [#35134](https://github.com/ClickHouse/ClickHouse/pull/35134) ([Azat Khuzhin](https://github.com/azat)). +* The `update_lag` external dictionary configuration option was unusable showing the error message ``Unexpected key `update_lag` in dictionary source configuration``. [#35089](https://github.com/ClickHouse/ClickHouse/pull/35089) ([Jason Chu](https://github.com/1lann)). +* Avoid possible deadlock on server shutdown. [#35081](https://github.com/ClickHouse/ClickHouse/pull/35081) ([Azat Khuzhin](https://github.com/azat)). +* Fix missing alias after function is optimized to a subcolumn when setting `optimize_functions_to_subcolumns` is enabled. Closes [#33798](https://github.com/ClickHouse/ClickHouse/issues/33798). [#35079](https://github.com/ClickHouse/ClickHouse/pull/35079) ([qieqieplus](https://github.com/qieqieplus)). +* Fix reading from `system.asynchronous_inserts` table if there exists asynchronous insert into table function. [#35050](https://github.com/ClickHouse/ClickHouse/pull/35050) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible exception `Reading for MergeTree family tables must be done with last position boundary` (relevant to operation on remote VFS). Closes [#34979](https://github.com/ClickHouse/ClickHouse/issues/34979). [#35001](https://github.com/ClickHouse/ClickHouse/pull/35001) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix unexpected result when use -State type aggregate function in window frame. [#34999](https://github.com/ClickHouse/ClickHouse/pull/34999) ([metahys](https://github.com/metahys)). +* Fix possible segfault in FileLog (experimental feature). Closes [#30749](https://github.com/ClickHouse/ClickHouse/issues/30749). [#34996](https://github.com/ClickHouse/ClickHouse/pull/34996) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible rare error `Cannot push block to port which already has data`. [#34993](https://github.com/ClickHouse/ClickHouse/pull/34993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong schema inference for unquoted dates in CSV. Closes [#34768](https://github.com/ClickHouse/ClickHouse/issues/34768). [#34961](https://github.com/ClickHouse/ClickHouse/pull/34961) ([Kruglov Pavel](https://github.com/Avogar)). +* Integration with Hive: Fix unexpected result when use `in` in `where` in hive query. [#34945](https://github.com/ClickHouse/ClickHouse/pull/34945) ([lgbo](https://github.com/lgbo-ustc)). +* Avoid busy polling in ClickHouse Keeper while searching for changelog files to delete. [#34931](https://github.com/ClickHouse/ClickHouse/pull/34931) ([Azat Khuzhin](https://github.com/azat)). +* Fix DateTime64 conversion from PostgreSQL. Closes [#33364](https://github.com/ClickHouse/ClickHouse/issues/33364). [#34910](https://github.com/ClickHouse/ClickHouse/pull/34910) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible "Part directory doesn't exist" during `INSERT` into MergeTree table backed by VFS over s3. [#34876](https://github.com/ClickHouse/ClickHouse/pull/34876) ([Azat Khuzhin](https://github.com/azat)). +* Support DDLs like CREATE USER to be executed on cross replicated cluster. [#34860](https://github.com/ClickHouse/ClickHouse/pull/34860) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix bugs for multiple columns group by in `WindowView` (experimental feature). [#34859](https://github.com/ClickHouse/ClickHouse/pull/34859) ([vxider](https://github.com/Vxider)). +* Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix bug for H3 funcs containing const columns which cause queries to fail. [#34743](https://github.com/ClickHouse/ClickHouse/pull/34743) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix `No such file or directory` with enabled `fsync_part_directory` and vertical merge. [#34739](https://github.com/ClickHouse/ClickHouse/pull/34739) ([Azat Khuzhin](https://github.com/azat)). +* Fix serialization/printing for system queries `RELOAD MODEL`, `RELOAD FUNCTION`, `RESTART DISK` when used `ON CLUSTER`. Closes [#34514](https://github.com/ClickHouse/ClickHouse/issues/34514). [#34696](https://github.com/ClickHouse/ClickHouse/pull/34696) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `allow_experimental_projection_optimization` with `enable_global_with_statement` (before it may lead to `Stack size too large` error in case of multiple expressions in `WITH` clause, and also it executes scalar subqueries again and again, so not it will be more optimal). [#34650](https://github.com/ClickHouse/ClickHouse/pull/34650) ([Azat Khuzhin](https://github.com/azat)). +* Stop to select part for mutate when the other replica has already updated the transaction log for `ReplatedMergeTree` engine. [#34633](https://github.com/ClickHouse/ClickHouse/pull/34633) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix incorrect result of trivial count query when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). +* Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). + + +### ClickHouse release v22.2, 2022-02-17 #### Upgrade Notes @@ -174,7 +309,7 @@ * This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)). -### ClickHouse release v22.1, 2022-01-18 +### ClickHouse release v22.1, 2022-01-18 #### Upgrade Notes @@ -232,7 +367,7 @@ #### Improvement -* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. +* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. * Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). * If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/CMakeLists.txt b/CMakeLists.txt index 9649fc32d74..4f1a6c05730 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,6 +222,12 @@ else () set(NO_WHOLE_ARCHIVE --no-whole-archive) endif () +option(ENABLE_CURL_BUILD "Enable curl, azure, sentry build on by default except MacOS." ON) +if (OS_DARWIN) + # Disable the curl, azure, senry build on MacOS + set (ENABLE_CURL_BUILD OFF) +endif () + # Ignored if `lld` is used option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.") @@ -261,13 +267,16 @@ endif () # Add a section with the hash of the compiled machine code for integrity checks. # Only for official builds, because adding a section can be time consuming (rewrite of several GB). # And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary) -if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) - set (USE_BINARY_HASH 1) +if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$")) + set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section") endif () # Allows to build stripped binary in a separate directory -if (OBJCOPY_PATH AND READELF_PATH) - set(BUILD_STRIPPED_BINARIES_PREFIX "" CACHE STRING "Build stripped binaries with debug info in separate directory") +if (OBJCOPY_PATH AND STRIP_PATH) + option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF) + if (INSTALL_STRIPPED_BINARIES) + set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") + endif() endif() cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd @@ -291,14 +300,28 @@ include(cmake/cpu_features.cmake) # Enable it explicitly. set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables") -# Reproducible builds -# If turned `ON`, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). -option(ENABLE_BUILD_PATH_MAPPING "Enable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ON) +# Reproducible builds. +if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + set (ENABLE_BUILD_PATH_MAPPING_DEFAULT OFF) +else () + set (ENABLE_BUILD_PATH_MAPPING_DEFAULT ON) +endif () + +option (ENABLE_BUILD_PATH_MAPPING "Enable remapping of file source paths in debug info, predefined preprocessor macros, and __builtin_FILE(). It's used to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ${ENABLE_BUILD_PATH_MAPPING_DEFAULT}) if (ENABLE_BUILD_PATH_MAPPING) set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") -endif() +endif () + +option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF) +if (ENABLE_BUILD_PROFILING) + if (COMPILER_CLANG) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace") + else () + message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang") + endif () +endif () if (${CMAKE_VERSION} VERSION_LESS "3.12.4") # CMake < 3.12 doesn't support setting 20 as a C++ standard version. diff --git a/SECURITY.md b/SECURITY.md index ca3c8b439fd..6c03a6bb945 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -15,7 +15,7 @@ The following versions of ClickHouse server are currently being supported with s | 20.x | :x: | | 21.1 | :x: | | 21.2 | :x: | -| 21.3 | ✅ | +| 21.3 | :x: | | 21.4 | :x: | | 21.5 | :x: | | 21.6 | :x: | @@ -23,9 +23,11 @@ The following versions of ClickHouse server are currently being supported with s | 21.8 | ✅ | | 21.9 | :x: | | 21.10 | :x: | -| 21.11 | ✅ | -| 21.12 | ✅ | +| 21.11 | :x: | +| 21.12 | :x: | | 22.1 | ✅ | +| 22.2 | ✅ | +| 22.3 | ✅ | ## Reporting a Vulnerability diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 8a1ca6064cb..3cfd2f6906a 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -2,6 +2,7 @@ set (SRCS argsToConfig.cpp coverage.cpp demangle.cpp + getAvailableMemoryAmount.cpp getFQDNOrHostName.cpp getMemoryAmount.cpp getPageSize.cpp diff --git a/base/base/getAvailableMemoryAmount.cpp b/base/base/getAvailableMemoryAmount.cpp new file mode 100644 index 00000000000..e9bbbd95caf --- /dev/null +++ b/base/base/getAvailableMemoryAmount.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include + +#include +#include +#include +#if defined(BSD) +#include +#include +#endif + + +uint64_t getAvailableMemoryAmountOrZero() +{ +#if defined(_SC_PHYS_PAGES) // linux + return getPageSize() * sysconf(_SC_PHYS_PAGES); +#elif defined(__FreeBSD__) + struct vmtotal vmt; + size_t vmt_size = sizeof(vmt); + if (sysctlbyname("vm.vmtotal", &vmt, &vmt_size, NULL, 0) == 0) + return getPageSize() * vmt.t_avm; + else + return 0; +#else // darwin + unsigned int usermem; + size_t len = sizeof(usermem); + static int mib[2] = { CTL_HW, HW_USERMEM }; + if (sysctl(mib, 2, &usermem, &len, nullptr, 0) == 0 && len == sizeof(usermem)) + return usermem; + else + return 0; +#endif +} + + +uint64_t getAvailableMemoryAmount() +{ + auto res = getAvailableMemoryAmountOrZero(); + if (!res) + throw std::runtime_error("Cannot determine available memory amount"); + return res; +} diff --git a/base/base/getAvailableMemoryAmount.h b/base/base/getAvailableMemoryAmount.h new file mode 100644 index 00000000000..44612945016 --- /dev/null +++ b/base/base/getAvailableMemoryAmount.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +/** Returns the size of currently available physical memory (RAM) in bytes. + * Returns 0 on unsupported platform or if it cannot determine the size of physical memory. + */ +uint64_t getAvailableMemoryAmountOrZero(); + +/** Throws exception if it cannot determine the size of physical memory. + */ +uint64_t getAvailableMemoryAmount(); diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index 20a755ed7a4..36a9b4f1f62 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -2,7 +2,7 @@ #pragma clang diagnostic ignored "-Wreserved-identifier" #endif -/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex. +/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/ #include diff --git a/base/base/phdr_cache.h b/base/base/phdr_cache.h index d2854ece0bc..b522710c4c4 100644 --- a/base/base/phdr_cache.h +++ b/base/base/phdr_cache.h @@ -1,6 +1,6 @@ #pragma once -/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex. +/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/ /** Collects all dl_phdr_info items and caches them in a static array. * Also rewrites dl_iterate_phdr with a lock-free version which consults the above cache diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 311349a2ba7..b27a904b31a 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -828,7 +828,6 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() /// Setup signal handlers. /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime. - addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals); addSignalHandler({SIGHUP}, closeLogsSignalHandler, &handled_signals); addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals); diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 54a74369dce..152a431922c 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -76,10 +76,10 @@ public: /// return none if daemon doesn't exist, reference to the daemon otherwise static std::optional> tryGetInstance() { return tryGetInstance(); } - /// В Graphite компоненты пути(папки) разделяются точкой. - /// У нас принят путь формата root_path.hostname_yandex_ru.key - /// root_path по умолчанию one_min - /// key - лучше группировать по смыслу. Например "meminfo.cached" или "meminfo.free", "meminfo.total" + /// Graphite metric name has components separated by dots. + /// We used to have the following format: root_path.hostname_clickhouse_com.key + /// root_path - one_min by default + /// key - something that makes sense. Examples: "meminfo.cached" or "meminfo.free", "meminfo.total". template void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "") { diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index ddec09121e1..ef7ec6d7fc0 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -51,6 +51,6 @@ if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") -elseif (YANDEX_OFFICIAL_BUILD) +elseif (CLICKHOUSE_OFFICIAL_BUILD) message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.") endif () diff --git a/base/loggers/Loggers.cpp b/base/loggers/Loggers.cpp index 7c627ad2272..512e44f79c7 100644 --- a/base/loggers/Loggers.cpp +++ b/base/loggers/Loggers.cpp @@ -197,7 +197,6 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr pf = new OwnPatternFormatter(color_enabled); Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, new Poco::ConsoleChannel); - logger.warning("Logging " + console_log_level_string + " to console"); log->setLevel(console_log_level); split->addChannel(log, "console"); } diff --git a/benchmark/greenplum/result_parser.py b/benchmark/greenplum/result_parser.py index 8af20d265a0..4ed1aa5c4a5 100755 --- a/benchmark/greenplum/result_parser.py +++ b/benchmark/greenplum/result_parser.py @@ -4,11 +4,12 @@ import sys import json + def parse_block(block=[], options=[]): - #print('block is here', block) - #show_query = False - #show_query = options.show_query + # print('block is here', block) + # show_query = False + # show_query = options.show_query result = [] query = block[0].strip() if len(block) > 4: @@ -20,9 +21,9 @@ def parse_block(block=[], options=[]): timing2 = block[2].strip().split()[1] timing3 = block[3].strip().split()[1] if options.show_queries: - result.append( query ) + result.append(query) if not options.show_first_timings: - result += [ timing1 , timing2, timing3 ] + result += [timing1, timing2, timing3] else: result.append(timing1) return result @@ -37,12 +38,12 @@ def read_stats_file(options, fname): for line in f.readlines(): - if 'SELECT' in line: + if "SELECT" in line: if len(block) > 1: - result.append( parse_block(block, options) ) - block = [ line ] - elif 'Time:' in line: - block.append( line ) + result.append(parse_block(block, options)) + block = [line] + elif "Time:" in line: + block.append(line) return result @@ -50,7 +51,7 @@ def read_stats_file(options, fname): def compare_stats_files(options, arguments): result = [] file_output = [] - pyplot_colors = ['y', 'b', 'g', 'r'] + pyplot_colors = ["y", "b", "g", "r"] for fname in arguments[1:]: file_output.append((read_stats_file(options, fname))) if len(file_output[0]) > 0: @@ -58,65 +59,92 @@ def compare_stats_files(options, arguments): for idx, data_set in enumerate(file_output): int_result = [] for timing in data_set: - int_result.append(float(timing[0])) #y values - result.append([[x for x in range(0, len(int_result)) ], int_result, -pyplot_colors[idx] + '^' ] ) -# result.append([x for x in range(1, len(int_result)) ]) #x values -# result.append( pyplot_colors[idx] + '^' ) + int_result.append(float(timing[0])) # y values + result.append( + [ + [x for x in range(0, len(int_result))], + int_result, + pyplot_colors[idx] + "^", + ] + ) + # result.append([x for x in range(1, len(int_result)) ]) #x values + # result.append( pyplot_colors[idx] + '^' ) return result + def parse_args(): from optparse import OptionParser - parser = OptionParser(usage='usage: %prog [options] [result_file_path]..') - parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries") - parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings") - parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode") + + parser = OptionParser(usage="usage: %prog [options] [result_file_path]..") + parser.add_option( + "-q", + "--show-queries", + help="Show statements along with timings", + action="store_true", + dest="show_queries", + ) + parser.add_option( + "-f", + "--show-first-timings", + help="Show only first tries timings", + action="store_true", + dest="show_first_timings", + ) + parser.add_option( + "-c", + "--compare-mode", + help="Prepare output for pyplot comparing result files.", + action="store", + dest="compare_mode", + ) (options, arguments) = parser.parse_args(sys.argv) if len(arguments) < 2: parser.print_usage() sys.exit(1) - return ( options, arguments ) + return (options, arguments) + def gen_pyplot_code(options, arguments): - result = '' + result = "" data_sets = compare_stats_files(options, arguments) for idx, data_set in enumerate(data_sets, start=0): x_values, y_values, line_style = data_set - result += '\nplt.plot(' - result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style ) - result += ', label=\'%s try\')' % idx - print('import matplotlib.pyplot as plt') + result += "\nplt.plot(" + result += "%s, %s, '%s'" % (x_values, y_values, line_style) + result += ", label='%s try')" % idx + print("import matplotlib.pyplot as plt") print(result) - print( 'plt.xlabel(\'Try number\')' ) - print( 'plt.ylabel(\'Timing\')' ) - print( 'plt.title(\'Benchmark query timings\')' ) - print('plt.legend()') - print('plt.show()') + print("plt.xlabel('Try number')") + print("plt.ylabel('Timing')") + print("plt.title('Benchmark query timings')") + print("plt.legend()") + print("plt.show()") def gen_html_json(options, arguments): tuples = read_stats_file(options, arguments[1]) - print('{') + print("{") print('"system: GreenPlum(x2),') - print(('"version": "%s",' % '4.3.9.1')) + print(('"version": "%s",' % "4.3.9.1")) print('"data_size": 10000000,') print('"time": "",') print('"comments": "",') print('"result":') - print('[') + print("[") for s in tuples: print(s) - print(']') - print('}') + print("]") + print("}") def main(): - ( options, arguments ) = parse_args() + (options, arguments) = parse_args() if len(arguments) > 2: gen_pyplot_code(options, arguments) else: gen_html_json(options, arguments) -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 8de2de1e3bb..28ce5c82a92 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54460) +SET(VERSION_REVISION 54461) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 3) +SET(VERSION_MINOR 4) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 75366fc95e510b7ac76759ef670702ae5f488a51) -SET(VERSION_DESCRIBE v22.3.1.1-testing) -SET(VERSION_STRING 22.3.1.1) +SET(VERSION_GITHASH 92ab33f560e638d1989c5ca543021ab53d110f5c) +SET(VERSION_DESCRIBE v22.4.1.1-testing) +SET(VERSION_STRING 22.4.1.1) # end of autochange diff --git a/cmake/strip.sh b/cmake/strip.sh deleted file mode 100755 index de596887159..00000000000 --- a/cmake/strip.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash - -BINARY_PATH=$1 -BINARY_NAME=$(basename $BINARY_PATH) -DESTINATION_STRIPPED_DIR=$2 -OBJCOPY_PATH=${3:objcopy} -READELF_PATH=${4:readelf} - -BUILD_ID=$($READELF_PATH -n $1 | sed -n '/Build ID/ { s/.*: //p; q; }') -BUILD_ID_PREFIX=${BUILD_ID:0:2} -BUILD_ID_SUFFIX=${BUILD_ID:2} -TEMP_BINARY_PATH="${BINARY_PATH}_temp" - -DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id" -DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin" - -mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX" -mkdir -p "$DESTINATION_STRIP_BINARY_DIR" - -$OBJCOPY_PATH --only-keep-debug "$BINARY_PATH" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" - -touch "$TEMP_BINARY_PATH" -$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$BINARY_PATH" "$TEMP_BINARY_PATH" -$OBJCOPY_PATH --strip-all "$TEMP_BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" -rm -f "$TEMP_BINARY_PATH" diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index e430807772d..2d6a3888503 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -11,16 +11,43 @@ macro(clickhouse_strip_binary) message(FATAL_ERROR "A binary path name must be provided for stripping binary") endif() - if (NOT DEFINED STRIP_DESTINATION_DIR) message(FATAL_ERROR "Destination directory for stripped binary must be provided") endif() add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD - COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} - COMMENT "Stripping clickhouse binary" VERBATIM + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin" + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin" + COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMENT "Stripping clickhouse binary" VERBATIM ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) + install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse) +endmacro() + + +macro(clickhouse_make_empty_debug_info_for_nfpm) + set(oneValueArgs TARGET DESTINATION_DIR) + cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN}) + + if (NOT DEFINED EMPTY_DEBUG_TARGET) + message(FATAL_ERROR "A target name must be provided for stripping binary") + endif() + + if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR) + message(FATAL_ERROR "Destination directory for empty debug must be provided") + endif() + + add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD + COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug" + COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" + COMMENT "Adding empty debug info for NFPM" VERBATIM + ) + + install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse) endmacro() diff --git a/cmake/tools.cmake b/cmake/tools.cmake index d6fddd0509e..d571a46ad26 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -170,32 +170,32 @@ else () message (FATAL_ERROR "Cannot find objcopy.") endif () -# Readelf (FIXME copypaste) +# Strip (FIXME copypaste) if (COMPILER_GCC) - find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip") else () - find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () -if (NOT READELF_PATH AND OS_DARWIN) +if (NOT STRIP_PATH AND OS_DARWIN) find_program (BREW_PATH NAMES "brew") if (BREW_PATH) execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) if (LLVM_PREFIX) - find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) endif () - if (NOT READELF_PATH) + if (NOT STRIP_PATH) execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) if (BINUTILS_PREFIX) - find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) endif () endif () endif () endif () -if (READELF_PATH) - message (STATUS "Using readelf: ${READELF_PATH}") +if (STRIP_PATH) + message (STATUS "Using strip: ${STRIP_PATH}") else () - message (FATAL_ERROR "Cannot find readelf.") + message (FATAL_ERROR "Cannot find strip.") endif () diff --git a/cmake/version.cmake b/cmake/version.cmake index 963f291c0f3..acaa772ff2f 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -18,6 +18,6 @@ set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") -if(YANDEX_OFFICIAL_BUILD) +if(CLICKHOUSE_OFFICIAL_BUILD) set(VERSION_OFFICIAL " (official build)") endif() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9cf307c473e..1f03c0fd341 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -119,9 +119,13 @@ add_contrib (fastops-cmake fastops) add_contrib (libuv-cmake libuv) add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv add_contrib (cassandra-cmake cassandra) # requires: libuv -add_contrib (curl-cmake curl) -add_contrib (azure-cmake azure) -add_contrib (sentry-native-cmake sentry-native) # requires: curl + +if (ENABLE_CURL_BUILD) + add_contrib (curl-cmake curl) + add_contrib (azure-cmake azure) + add_contrib (sentry-native-cmake sentry-native) # requires: curl +endif() + add_contrib (fmtlib-cmake fmtlib) add_contrib (krb5-cmake krb5) add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5 diff --git a/contrib/arrow b/contrib/arrow index 1d9cc51daa4..efdcd015cfd 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 1d9cc51daa4e7e9fc6926320ef73759818bd736e +Subproject commit efdcd015cfdee1b6aa349c9ca227ca12c3d697f5 diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index d91ce40dd54..c5bda41782d 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -69,9 +69,10 @@ endif () target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS}) # create a symlink to include headers with +set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") ADD_CUSTOM_TARGET(avro_symlink_headers ALL - COMMAND ${CMAKE_COMMAND} -E make_directory "${AVROCPP_ROOT_DIR}/include" - COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVROCPP_ROOT_DIR}/include/avro" + COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}" + COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro" ) add_dependencies(_avrocpp avro_symlink_headers) -target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVROCPP_ROOT_DIR}/include") +target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}") diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 0215c68e683..3d66bc97971 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -27,7 +27,11 @@ target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRAR # asio -target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1) +target_compile_definitions (_boost_headers_only INTERFACE + BOOST_ASIO_STANDALONE=1 + # Avoid using of deprecated in c++ > 17 std::result_of + BOOST_ASIO_HAS_STD_INVOKE_RESULT=1 +) # iostreams diff --git a/contrib/curl b/contrib/curl index 3b8bbbbd160..801bd5138ce 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit 3b8bbbbd1609c638a3d3d0acb148a33dedb67be3 +Subproject commit 801bd5138ce31aa0d906fa4e2eabfc599d74e793 diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 589f40384e3..b1e1a0ded8a 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -32,7 +32,6 @@ set (SRCS "${LIBRARY_DIR}/lib/transfer.c" "${LIBRARY_DIR}/lib/strcase.c" "${LIBRARY_DIR}/lib/easy.c" - "${LIBRARY_DIR}/lib/security.c" "${LIBRARY_DIR}/lib/curl_fnmatch.c" "${LIBRARY_DIR}/lib/fileinfo.c" "${LIBRARY_DIR}/lib/wildcard.c" @@ -115,6 +114,12 @@ set (SRCS "${LIBRARY_DIR}/lib/curl_get_line.c" "${LIBRARY_DIR}/lib/altsvc.c" "${LIBRARY_DIR}/lib/socketpair.c" + "${LIBRARY_DIR}/lib/bufref.c" + "${LIBRARY_DIR}/lib/dynbuf.c" + "${LIBRARY_DIR}/lib/hsts.c" + "${LIBRARY_DIR}/lib/http_aws_sigv4.c" + "${LIBRARY_DIR}/lib/mqtt.c" + "${LIBRARY_DIR}/lib/rename.c" "${LIBRARY_DIR}/lib/vauth/vauth.c" "${LIBRARY_DIR}/lib/vauth/cleartext.c" "${LIBRARY_DIR}/lib/vauth/cram.c" @@ -131,8 +136,6 @@ set (SRCS "${LIBRARY_DIR}/lib/vtls/gtls.c" "${LIBRARY_DIR}/lib/vtls/vtls.c" "${LIBRARY_DIR}/lib/vtls/nss.c" - "${LIBRARY_DIR}/lib/vtls/polarssl.c" - "${LIBRARY_DIR}/lib/vtls/polarssl_threadlock.c" "${LIBRARY_DIR}/lib/vtls/wolfssl.c" "${LIBRARY_DIR}/lib/vtls/schannel.c" "${LIBRARY_DIR}/lib/vtls/schannel_verify.c" @@ -141,6 +144,7 @@ set (SRCS "${LIBRARY_DIR}/lib/vtls/mbedtls.c" "${LIBRARY_DIR}/lib/vtls/mesalink.c" "${LIBRARY_DIR}/lib/vtls/bearssl.c" + "${LIBRARY_DIR}/lib/vtls/keylog.c" "${LIBRARY_DIR}/lib/vquic/ngtcp2.c" "${LIBRARY_DIR}/lib/vquic/quiche.c" "${LIBRARY_DIR}/lib/vssh/libssh2.c" diff --git a/contrib/hyperscan b/contrib/hyperscan index e9f08df0213..5edc68c5ac6 160000 --- a/contrib/hyperscan +++ b/contrib/hyperscan @@ -1 +1 @@ -Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa +Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index 685e8737ef0..214d23bc2a9 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -set (ENABLE_KRB5_DEFAULT 1) +set (ENABLE_KRB5_DEFAULT ${ENABLE_LIBRARIES}) if (NOT CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND NOT CMAKE_CROSSCOMPILING)) message (WARNING "krb5 disabled in non-Linux and non-native-Darwin environments") set (ENABLE_KRB5_DEFAULT 0) @@ -16,6 +16,7 @@ if(NOT AWK_PROGRAM) endif() set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src") +set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private") set(ALL_SRCS "${KRB5_SOURCE_DIR}/util/et/et_name.c" @@ -90,7 +91,6 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/get_tkt_flags.c" "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/set_allowable_enctypes.c" "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/k5sealiov.c" - "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/gssapi_err_krb5.c" "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/canon_name.c" "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/inq_cred.c" "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/export_sec_context.c" @@ -143,11 +143,12 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_buffer_set.c" "${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_set.c" "${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_token.c" - "${KRB5_SOURCE_DIR}/lib/gssapi/generic/gssapi_err_generic.c" "${KRB5_SOURCE_DIR}/lib/gssapi/generic/disp_major_status.c" "${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_seqstate.c" "${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_errmap.c" "${KRB5_SOURCE_DIR}/lib/gssapi/generic/rel_buffer.c" + "${KRB5_ET_BIN_DIR}/lib/gssapi/krb5/gssapi_err_krb5.c" + "${KRB5_ET_BIN_DIR}/lib/gssapi/generic/gssapi_err_generic.c" "${KRB5_SOURCE_DIR}/lib/gssapi/spnego/spnego_mech.c" "${KRB5_SOURCE_DIR}/lib/gssapi/spnego/negoex_util.c" @@ -256,8 +257,8 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/util/profile/prof_parse.c" "${KRB5_SOURCE_DIR}/util/profile/prof_get.c" "${KRB5_SOURCE_DIR}/util/profile/prof_set.c" - "${KRB5_SOURCE_DIR}/util/profile/prof_err.c" "${KRB5_SOURCE_DIR}/util/profile/prof_init.c" + "${KRB5_ET_BIN_DIR}/util/profile/prof_err.c" "${KRB5_SOURCE_DIR}/lib/krb5/krb/fwd_tgt.c" "${KRB5_SOURCE_DIR}/lib/krb5/krb/conv_creds.c" "${KRB5_SOURCE_DIR}/lib/krb5/krb/fast.c" @@ -450,13 +451,12 @@ set(ALL_SRCS - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/k5e1_err.c" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kdb5_err.c" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/asn1_err.c" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb5_err.c" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb524_err.c" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kv5m_err.c" - + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/k5e1_err.c" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kdb5_err.c" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/asn1_err.c" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb5_err.c" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb524_err.c" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kv5m_err.c" "${KRB5_SOURCE_DIR}/lib/krb5/rcache/rc_base.c" @@ -473,7 +473,7 @@ set(ALL_SRCS ) add_custom_command( - OUTPUT "${KRB5_SOURCE_DIR}/util/et/compile_et" + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et" COMMAND /bin/sh ./config_script ./compile_et.sh @@ -481,50 +481,17 @@ add_custom_command( ${AWK_PROGRAM} sed > - compile_et + ${CMAKE_CURRENT_BINARY_DIR}/compile_et DEPENDS "${KRB5_SOURCE_DIR}/util/et/compile_et.sh" "${KRB5_SOURCE_DIR}/util/et/config_script" WORKING_DIRECTORY "${KRB5_SOURCE_DIR}/util/et" ) -file(GLOB_RECURSE ET_FILES - "${KRB5_SOURCE_DIR}/*.et" -) - -function(preprocess_et out_var) - set(result) - foreach(in_f ${ARGN}) - string(REPLACE - .et - .c - F_C - ${in_f} - ) - string(REPLACE - .et - .h - F_H - ${in_f} - ) - - get_filename_component(ET_PATH ${in_f} DIRECTORY) - - add_custom_command(OUTPUT ${F_C} ${F_H} - COMMAND perl "${KRB5_SOURCE_DIR}/util/et/compile_et" -d "${KRB5_SOURCE_DIR}/util/et" ${in_f} - DEPENDS ${in_f} "${KRB5_SOURCE_DIR}/util/et/compile_et" - WORKING_DIRECTORY ${ET_PATH} - VERBATIM - ) - list(APPEND result ${F_C}) - endforeach() - set(${out_var} "${result}" PARENT_SCOPE) -endfunction() - add_custom_command( - OUTPUT "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/error_map.h" + OUTPUT "${KRB5_ET_BIN_DIR}/error_map.h" COMMAND perl -I../../../util ../../../util/gen-map.pl - -oerror_map.h + -o${KRB5_ET_BIN_DIR}/error_map.h NAME=gsserrmap KEY=OM_uint32 VALUE=char* @@ -536,22 +503,21 @@ add_custom_command( add_custom_target( ERROR_MAP_H - DEPENDS "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/error_map.h" + DEPENDS "${KRB5_ET_BIN_DIR}/error_map.h" VERBATIM ) add_custom_command( - OUTPUT "${KRB5_SOURCE_DIR}/lib/gssapi/generic/errmap.h" - COMMAND perl -w -I../../../util ../../../util/gen.pl bimap errmap.h NAME=mecherrmap LEFT=OM_uint32 RIGHT=struct\ mecherror LEFTPRINT=print_OM_uint32 RIGHTPRINT=mecherror_print LEFTCMP=cmp_OM_uint32 RIGHTCMP=mecherror_cmp + OUTPUT "${KRB5_ET_BIN_DIR}/errmap.h" + COMMAND perl -w -I../../../util ../../../util/gen.pl bimap ${KRB5_ET_BIN_DIR}/errmap.h NAME=mecherrmap LEFT=OM_uint32 RIGHT=struct\ mecherror LEFTPRINT=print_OM_uint32 RIGHTPRINT=mecherror_print LEFTCMP=cmp_OM_uint32 RIGHTCMP=mecherror_cmp WORKING_DIRECTORY "${KRB5_SOURCE_DIR}/lib/gssapi/generic" ) add_custom_target( ERRMAP_H - DEPENDS "${KRB5_SOURCE_DIR}/lib/gssapi/generic/errmap.h" + DEPENDS "${KRB5_ET_BIN_DIR}/errmap.h" VERBATIM ) - add_custom_target( KRB_5_H DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/include/krb5/krb5.h" @@ -567,7 +533,40 @@ add_dependencies( KRB_5_H ) -preprocess_et(processed_et_files ${ET_FILES}) +# +# Generate error tables +# +function(preprocess_et et_path) + string(REPLACE .et .c F_C ${et_path}) + string(REPLACE .et .h F_H ${et_path}) + get_filename_component(et_dir ${et_path} DIRECTORY) + get_filename_component(et_name ${et_path} NAME_WLE) + + add_custom_command(OUTPUT ${F_C} ${F_H} ${KRB5_ET_BIN_DIR}/${et_name}.h + COMMAND perl "${CMAKE_CURRENT_BINARY_DIR}/compile_et" -d "${KRB5_SOURCE_DIR}/util/et" ${et_path} + # for #include w/o path (via -iquote) + COMMAND ${CMAKE_COMMAND} -E create_symlink ${F_H} ${KRB5_ET_BIN_DIR}/${et_name}.h + DEPENDS ${et_path} "${CMAKE_CURRENT_BINARY_DIR}/compile_et" + WORKING_DIRECTORY ${et_dir} + VERBATIM + ) +endfunction() + +function(generate_error_tables) + file(GLOB_RECURSE ET_FILES "${KRB5_SOURCE_DIR}/*.et") + foreach(et_path ${ET_FILES}) + string(REPLACE ${KRB5_SOURCE_DIR} ${KRB5_ET_BIN_DIR} et_bin_path ${et_path}) + string(REPLACE / _ et_target_name ${et_path}) + get_filename_component(et_bin_dir ${et_bin_path} DIRECTORY) + add_custom_command(OUTPUT ${et_bin_path} + COMMAND ${CMAKE_COMMAND} -E make_directory ${et_bin_dir} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${et_path} ${et_bin_path} + VERBATIM + ) + preprocess_et(${et_bin_path}) + endforeach() +endfunction() +generate_error_tables() if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_custom_command( @@ -634,12 +633,12 @@ file(MAKE_DIRECTORY SET(KRBHDEP "${KRB5_SOURCE_DIR}/include/krb5/krb5.hin" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb5_err.h" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/k5e1_err.h" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kdb5_err.h" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kv5m_err.h" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb524_err.h" - "${KRB5_SOURCE_DIR}/lib/krb5/error_tables/asn1_err.h" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb5_err.h" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/k5e1_err.h" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kdb5_err.h" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kv5m_err.h" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb524_err.h" + "${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/asn1_err.h" ) # cmake < 3.18 does not have 'cat' command @@ -656,6 +655,11 @@ target_include_directories(_krb5 SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/include" ) +target_compile_options(_krb5 PRIVATE + # For '#include "file.h"' + -iquote "${CMAKE_CURRENT_BINARY_DIR}/include_private" +) + target_include_directories(_krb5 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/include_private" # For autoconf.h and other generated headers. ${KRB5_SOURCE_DIR} diff --git a/contrib/libcxx b/contrib/libcxx index 61e60294b1d..172b2ae074f 160000 --- a/contrib/libcxx +++ b/contrib/libcxx @@ -1 +1 @@ -Subproject commit 61e60294b1de01483caa9f5d00f437c99b674de6 +Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239 diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 332fb0411cd..dc9df48b2c1 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -18,12 +18,14 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp" +"${LIBCXX_SOURCE_DIR}/src/format.cpp" "${LIBCXX_SOURCE_DIR}/src/functional.cpp" "${LIBCXX_SOURCE_DIR}/src/future.cpp" "${LIBCXX_SOURCE_DIR}/src/hash.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp" "${LIBCXX_SOURCE_DIR}/src/iostream.cpp" +"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp" "${LIBCXX_SOURCE_DIR}/src/locale.cpp" "${LIBCXX_SOURCE_DIR}/src/memory.cpp" "${LIBCXX_SOURCE_DIR}/src/mutex.cpp" @@ -33,6 +35,9 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/random.cpp" "${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp" "${LIBCXX_SOURCE_DIR}/src/regex.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp" "${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp" "${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp" "${LIBCXX_SOURCE_DIR}/src/string.cpp" @@ -49,7 +54,9 @@ set(SRCS add_library(cxx ${SRCS}) set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") -target_include_directories(cxx SYSTEM BEFORE PUBLIC $) +target_include_directories(cxx SYSTEM BEFORE PUBLIC + $ + $/src) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. diff --git a/contrib/libcxxabi b/contrib/libcxxabi index df8f1e727db..6eb7cc7a7bd 160000 --- a/contrib/libcxxabi +++ b/contrib/libcxxabi @@ -1 +1 @@ -Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076 +Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7 diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 425111d9b26..bf1ede8a60e 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -1,24 +1,24 @@ set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi") set(SRCS -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp" "${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp" "${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" ) add_library(cxxabi ${SRCS}) @@ -30,6 +30,7 @@ target_compile_options(cxxabi PRIVATE -w) target_include_directories(cxxabi SYSTEM BEFORE PUBLIC $ PRIVATE $ + PRIVATE $ ) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. diff --git a/contrib/libxml2 b/contrib/libxml2 index 18890f471c4..a075d256fd9 160000 --- a/contrib/libxml2 +++ b/contrib/libxml2 @@ -1 +1 @@ -Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf +Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca diff --git a/contrib/llvm-cmake/CMakeLists.txt b/contrib/llvm-cmake/CMakeLists.txt index 6ff07f0e016..87c8a65510f 100644 --- a/contrib/llvm-cmake/CMakeLists.txt +++ b/contrib/llvm-cmake/CMakeLists.txt @@ -1,12 +1,9 @@ -# During cross-compilation in our CI we have to use llvm-tblgen and other building tools -# tools to be build for host architecture and everything else for target architecture (e.g. AArch64) -# Possible workaround is to use llvm-tblgen from some package... -# But lets just enable LLVM for native builds -if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined") - set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) +if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") + set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() - set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) + set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) endif() + option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) if (NOT ENABLE_EMBEDDED_COMPILER) diff --git a/contrib/poco b/contrib/poco index 520a90e02e3..008b1646947 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 520a90e02e3e5cb90afeae1846d161dbc508a6f1 +Subproject commit 008b16469471d55b176db181756c94e3f14dd2dc diff --git a/contrib/replxx b/contrib/replxx index 9460e5e0fc1..3fd0e3c9364 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d +Subproject commit 3fd0e3c9364a589447453d9906d854ebd8d385c5 diff --git a/contrib/sysroot b/contrib/sysroot index bbcac834526..e9fb375d0a1 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit bbcac834526d90d1e764164b861be426891d1743 +Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 diff --git a/contrib/unixodbc b/contrib/unixodbc index b0ad30f7f62..a2cd5395e8c 160000 --- a/contrib/unixodbc +++ b/contrib/unixodbc @@ -1 +1 @@ -Subproject commit b0ad30f7f6289c12b76f04bfb9d466374bb32168 +Subproject commit a2cd5395e8c7f7390025ec93af5bfebef3fb5fcd diff --git a/debian/.gitignore b/debian/.gitignore deleted file mode 100644 index b4432556de7..00000000000 --- a/debian/.gitignore +++ /dev/null @@ -1,18 +0,0 @@ -control -copyright -tmp/ -clickhouse-benchmark/ -clickhouse-client.docs -clickhouse-client/ -clickhouse-common-static-dbg/ -clickhouse-common-static.docs -clickhouse-common-static/ -clickhouse-server-base/ -clickhouse-server-common/ -clickhouse-server/ -debhelper-build-stamp -files -*.debhelper.log -*.debhelper -*.substvars - diff --git a/debian/.pbuilderrc b/debian/.pbuilderrc deleted file mode 100644 index 485906f6198..00000000000 --- a/debian/.pbuilderrc +++ /dev/null @@ -1,223 +0,0 @@ -# -# sudo apt install pbuilder fakeroot debhelper debian-archive-keyring debian-keyring -# -# ubuntu: -# prepare old (trusty or earlier) host system: - -# sudo ln -s gutsy /usr/share/debootstrap/scripts/eoan -# sudo ln -s gutsy /usr/share/debootstrap/scripts/disco -# sudo ln -s gutsy /usr/share/debootstrap/scripts/cosmic -# sudo ln -s gutsy /usr/share/debootstrap/scripts/artful -# sudo ln -s gutsy /usr/share/debootstrap/scripts/bionic -# sudo ln -s sid /usr/share/debootstrap/scripts/buster -# build ubuntu: -# sudo DIST=bionic pbuilder create --configfile debian/.pbuilderrc && DIST=bionic pdebuild --configfile debian/.pbuilderrc -# sudo DIST=cosmic pbuilder create --configfile debian/.pbuilderrc && DIST=cosmic pdebuild --configfile debian/.pbuilderrc -# sudo DIST=disco pbuilder create --configfile debian/.pbuilderrc && DIST=disco pdebuild --configfile debian/.pbuilderrc -# sudo DIST=eoan pbuilder create --configfile debian/.pbuilderrc && DIST=eoan pdebuild --configfile debian/.pbuilderrc -# sudo DIST=devel pbuilder create --configfile debian/.pbuilderrc && DIST=devel pdebuild --configfile debian/.pbuilderrc -# build debian: -# sudo DIST=stable pbuilder create --configfile debian/.pbuilderrc && DIST=stable pdebuild --configfile debian/.pbuilderrc -# sudo DIST=testing pbuilder create --configfile debian/.pbuilderrc && DIST=testing pdebuild --configfile debian/.pbuilderrc -# sudo DIST=unstable pbuilder create --configfile debian/.pbuilderrc && DIST=unstable pdebuild --configfile debian/.pbuilderrc -# sudo DIST=experimental pbuilder create --configfile debian/.pbuilderrc && DIST=experimental pdebuild --configfile debian/.pbuilderrc -# build i386 experimental: -# sudo DIST=trusty ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=trusty ARCH=i386 pdebuild --configfile debian/.pbuilderrc -# sudo DIST=xenial ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=xenial ARCH=i386 pdebuild --configfile debian/.pbuilderrc -# sudo DIST=zesty ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=zesty ARCH=i386 pdebuild --configfile debian/.pbuilderrc -# sudo DIST=artful ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=artful ARCH=i386 pdebuild --configfile debian/.pbuilderrc -# sudo DIST=bionic ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=bionic ARCH=i386 pdebuild --configfile debian/.pbuilderrc -# sudo DIST=stable ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=stable ARCH=i386 pdebuild --configfile debian/.pbuilderrc -# sudo DIST=testing ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=testing ARCH=i386 pdebuild --configfile debian/.pbuilderrc -# sudo DIST=experimental ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=experimental ARCH=i386 pdebuild --configfile debian/.pbuilderrc -# test gcc-9 -# env DEB_CC=gcc-9 DEB_CXX=g++-9 EXTRAPACKAGES="g++-9 gcc-9" DIST=disco pdebuild --configfile debian/.pbuilderrc -# use only clang: -# env DEB_CC=clang-8 DEB_CXX=clang++-8 EXTRAPACKAGES=clang-8 DIST=disco pdebuild --configfile debian/.pbuilderrc -# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES=clang-5.0 DIST=artful pdebuild --configfile debian/.pbuilderrc -# clang+asan: -# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES="clang-5.0 libc++abi-dev libc++-dev" CMAKE_FLAGS="-DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DCMAKE_BUILD_TYPE=Asan" DIST=artful pdebuild --configfile debian/.pbuilderrc -# clang+tsan: -# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES="clang-5.0 libc++abi-dev libc++-dev" CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Tsan" DIST=artful pdebuild --configfile debian/.pbuilderrc -# without sse for old systems and some VM: -# env DH_VERBOSE=1 CMAKE_FLAGS="-DHAVE_SSE41=0 -DHAVE_SSE42=0 -DHAVE_POPCNT=0 -DHAVE_SSE2_INTRIN=0 -DSSE2FLAG=' ' -DHAVE_SSE42_INTRIN=0 -DSSE4FLAG=' ' -DHAVE_PCLMULQDQ_INTRIN=0 -DPCLMULFLAG=' '" DIST=artful pdebuild --configfile debian/.pbuilderrc - -# Note: on trusty host creating some future dists can fail (debootstrap error). - -# Your packages built here: /var/cache/pbuilder/*-*/result - -# from https://wiki.debian.org/PbuilderTricks : - -# Codenames for Debian suites according to their alias. Update these when -# needed. -UNSTABLE_CODENAME="sid" -TESTING_CODENAME="buster" -STABLE_CODENAME="stretch" -STABLE_BACKPORTS_SUITE="$STABLE_CODENAME-backports" - -# List of Debian suites. -DEBIAN_SUITES=($UNSTABLE_CODENAME $TESTING_CODENAME $STABLE_CODENAME $STABLE_BACKPORTS_SUITE - "experimental" "unstable" "testing" "stable") - -# List of Ubuntu suites. Update these when needed. -UBUNTU_SUITES=("eoan" "disco" "cosmic" "bionic" "artful" "zesty" "xenial" "trusty" "devel") - -# Set a default distribution if none is used. Note that you can set your own default (i.e. ${DIST:="unstable"}). -HOST_DIST=`lsb_release --short --codename` -: ${DIST:="$HOST_DIST"} - -# Optionally change Debian codenames in $DIST to their aliases. -case "$DIST" in - $UNSTABLE_CODENAME) - DIST="unstable" - ;; - $TESTING_CODENAME) - DIST="testing" - ;; - $STABLE_CODENAME) - DIST="stable" - ;; -esac - -# Optionally set the architecture to the host architecture if none set. Note -# that you can set your own default (i.e. ${ARCH:="i386"}). -: ${ARCH:="$(dpkg --print-architecture)"} - -NAME="$DIST" -if [ -n "${ARCH}" ]; then - NAME="$NAME-$ARCH" - DEBOOTSTRAPOPTS=("--arch" "$ARCH" "${DEBOOTSTRAPOPTS[@]}") -fi - -BASETGZ=${SET_BASETGZ} -BASETGZ=${BASETGZ:="/var/cache/pbuilder/$NAME-base.tgz"} -DISTRIBUTION="$DIST" -BUILDRESULT=${SET_BUILDRESULT} -BUILDRESULT=${BUILDRESULT:="/var/cache/pbuilder/$NAME/result/"} -APTCACHE="/var/cache/pbuilder/$NAME/aptcache/" -BUILDPLACE="/var/cache/pbuilder/build/" -ALLOWUNTRUSTED=${SET_ALLOWUNTRUSTED:=${ALLOWUNTRUSTED}} - -#DEBOOTSTRAPOPTS=( '--variant=buildd' $SET_DEBOOTSTRAPOPTS ) - - -if $(echo ${DEBIAN_SUITES[@]} | grep -q $DIST); then - # Debian configuration - OSNAME=debian - MIRRORSITE=${SET_MIRRORSITE="http://deb.debian.org/$OSNAME/"} - COMPONENTS="main contrib non-free" - if $(echo "$STABLE_CODENAME stable" | grep -q $DIST); then - OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $STABLE_BACKPORTS_SUITE $COMPONENTS" - fi - # APTKEYRINGS=/usr/share/keyrings/debian-archive-keyring.gpg - - case "$HOST_DIST" in - "trusty" ) - DEBOOTSTRAPOPTS+=( '--no-check-gpg' ) - ;; - *) - DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-archive-keyring.gpg' ) - # DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-keyring.gpg' ) - esac -elif $(echo ${UBUNTU_SUITES[@]} | grep -q $DIST); then - # Ubuntu configuration - OSNAME=ubuntu - - if [[ "$ARCH" == "amd64" || "$ARCH" == "i386" ]]; then - MIRRORSITE=${SET_MIRRORSITE="http://archive.ubuntu.com/$OSNAME/"} - else - MIRRORSITE=${SET_MIRRORSITE="http://ports.ubuntu.com/ubuntu-ports/"} - fi - - COMPONENTS="main restricted universe multiverse" - - OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-updates main restricted universe multiverse" - OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-security main restricted universe multiverse" - OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-proposed main restricted universe multiverse" - - case "$DIST" in - "trusty" | "xenial" ) - OTHERMIRROR="$OTHERMIRROR | deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/$OSNAME $DIST main" - ALLOWUNTRUSTED=yes - ;; - esac - - # deb http://apt.llvm.org/zesty/ llvm-toolchain-zesty-5.0 main -else - echo "Unknown distribution: $DIST" - exit 1 -fi - -echo "using $NAME $OSNAME $DIST $ARCH $LOGNAME $MIRRORSITE" - -case "$DIST" in - "trusty") - # ccache broken - ;; - *) - CCACHEDIR=${SET_CCACHEDIR:="/var/cache/pbuilder/ccache"} - ;; -esac - -# old systems with default gcc <= 6 -case "$DIST" in - "trusty" | "xenial" | "stable" ) - export DEB_CC=gcc-7 - export DEB_CXX=g++-7 - ;; -esac - -if [ "$ARCH" != arm64 ]; then - case "$DIST" in -# TODO: fix llvm-8 and use for "disco" and "eoan" - "experimental") - EXTRAPACKAGES+=" liblld-8-dev libclang-8-dev llvm-8-dev liblld-8 " - export CMAKE_FLAGS="-DLLVM_VERSION=8 $CMAKE_FLAGS" - ;; - "eoan" | "disco" | "cosmic" | "testing" | "unstable") - EXTRAPACKAGES+=" liblld-7-dev libclang-7-dev llvm-7-dev liblld-7 " - export CMAKE_FLAGS="-DLLVM_VERSION=7 $CMAKE_FLAGS" - ;; - "bionic") - EXTRAPACKAGES+=" liblld-6.0-dev libclang-6.0-dev liblld-6.0 " - export CMAKE_FLAGS="-DLLVM_VERSION=6 $CMAKE_FLAGS" - ;; - "artful" ) - EXTRAPACKAGES+=" liblld-5.0-dev libclang-5.0-dev liblld-5.0 " - ;; - esac -else - export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=0 $CMAKE_FLAGS" -fi - -# Will test symbols -#EXTRAPACKAGES+=" gdb " - -# For killall in pbuilder-hooks: -EXTRAPACKAGES+=" psmisc " - -[[ $CCACHE_PREFIX == 'distcc' ]] && EXTRAPACKAGES+=" $CCACHE_PREFIX " && USENETWORK=yes && export DISTCC_DIR=/var/cache/pbuilder/distcc - -[[ $ARCH == 'i386' ]] && EXTRAPACKAGES+=" libssl-dev " - -export DEB_BUILD_OPTIONS=parallel=`nproc` - -# Floating bug with permissions: -[ -n "$CCACHEDIR" ] && sudo mkdir -p $CCACHEDIR -[ -n "$CCACHEDIR" ] && sudo chmod -R a+rwx $CCACHEDIR || true -# chown -R $BUILDUSERID:$BUILDUSERID $CCACHEDIR - - -# Do not create source package inside pbuilder (-b) -# Use current dir to make package (by default should have src archive) -# echo "3.0 (native)" > debian/source/format -# OR -# pdebuild -b --debbuildopts "--source-option=--format=\"3.0 (native)\"" -# OR -DEBBUILDOPTS="-b --source-option=--format=\"3.0 (native)\"" - -HOOKDIR="debian/pbuilder-hooks" - -#echo "DEBOOTSTRAPOPTS=${DEBOOTSTRAPOPTS[@]}" -#echo "ALLOWUNTRUSTED=${ALLOWUNTRUSTED} OTHERMIRROR=${OTHERMIRROR}" -#echo "EXTRAPACKAGES=${EXTRAPACKAGES}" diff --git a/debian/changelog b/debian/changelog deleted file mode 100644 index 43b46f561c8..00000000000 --- a/debian/changelog +++ /dev/null @@ -1,5 +0,0 @@ -clickhouse (22.1.1.1) unstable; urgency=low - - * Modified source code - - -- clickhouse-release Thu, 09 Dec 2021 00:32:58 +0300 diff --git a/debian/changelog.in b/debian/changelog.in deleted file mode 100644 index ce2ee757999..00000000000 --- a/debian/changelog.in +++ /dev/null @@ -1,5 +0,0 @@ -clickhouse (@VERSION_STRING@) unstable; urgency=low - - * Modified source code - - -- @AUTHOR@ <@EMAIL@> @DATE@ diff --git a/debian/clickhouse-client.install b/debian/clickhouse-client.install deleted file mode 100644 index f19f937b318..00000000000 --- a/debian/clickhouse-client.install +++ /dev/null @@ -1,7 +0,0 @@ -usr/bin/clickhouse-client -usr/bin/clickhouse-local -usr/bin/clickhouse-compressor -usr/bin/clickhouse-benchmark -usr/bin/clickhouse-format -usr/bin/clickhouse-obfuscator -etc/clickhouse-client/config.xml diff --git a/debian/clickhouse-common-static.install b/debian/clickhouse-common-static.install deleted file mode 100644 index 087a6dbba8f..00000000000 --- a/debian/clickhouse-common-static.install +++ /dev/null @@ -1,5 +0,0 @@ -usr/bin/clickhouse -usr/bin/clickhouse-odbc-bridge -usr/bin/clickhouse-library-bridge -usr/bin/clickhouse-extract-from-config -usr/share/bash-completion/completions diff --git a/debian/clickhouse-server.cron.d b/debian/clickhouse-server.cron.d deleted file mode 100644 index 1e5d4aab733..00000000000 --- a/debian/clickhouse-server.cron.d +++ /dev/null @@ -1 +0,0 @@ -#*/10 * * * * root ((which service > /dev/null 2>&1 && (service clickhouse-server condstart ||:)) || /etc/init.d/clickhouse-server condstart) > /dev/null 2>&1 diff --git a/debian/clickhouse-server.docs b/debian/clickhouse-server.docs deleted file mode 100644 index e12d6533be2..00000000000 --- a/debian/clickhouse-server.docs +++ /dev/null @@ -1,4 +0,0 @@ -LICENSE -AUTHORS -README.md -CHANGELOG.md diff --git a/debian/clickhouse-server.install b/debian/clickhouse-server.install deleted file mode 100644 index b1475fdf162..00000000000 --- a/debian/clickhouse-server.install +++ /dev/null @@ -1,6 +0,0 @@ -usr/bin/clickhouse-server -usr/bin/clickhouse-copier -usr/bin/clickhouse-report -etc/clickhouse-server/config.xml -etc/clickhouse-server/users.xml -etc/systemd/system/clickhouse-server.service diff --git a/debian/control b/debian/control deleted file mode 100644 index c5d98d98f41..00000000000 --- a/debian/control +++ /dev/null @@ -1,58 +0,0 @@ -Source: clickhouse -Section: database -Priority: optional -Maintainer: Alexey Milovidov -Build-Depends: debhelper (>= 9), - cmake | cmake3, - ninja-build, - clang-13, - llvm-13, - lld-13, - libc6-dev, - tzdata -Standards-Version: 3.9.8 - -Package: clickhouse-client -Architecture: all -Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}) -Replaces: clickhouse-compressor -Conflicts: clickhouse-compressor -Description: Client binary for ClickHouse - ClickHouse is a column-oriented database management system - that allows generating analytical data reports in real time. - . - This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark - -Package: clickhouse-common-static -Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends} -Suggests: clickhouse-common-static-dbg -Replaces: clickhouse-common, clickhouse-server-base -Provides: clickhouse-common, clickhouse-server-base -Description: Common files for ClickHouse - ClickHouse is a column-oriented database management system - that allows generating analytical data reports in real time. - . - This package provides common files for both clickhouse server and client - -Package: clickhouse-server -Architecture: all -Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser -Recommends: libcap2-bin -Replaces: clickhouse-server-common, clickhouse-server-base -Provides: clickhouse-server-common -Description: Server binary for ClickHouse - ClickHouse is a column-oriented database management system - that allows generating analytical data reports in real time. - . - This package provides clickhouse common configuration files - -Package: clickhouse-common-static-dbg -Architecture: any -Section: debug -Priority: optional -Depends: ${misc:Depends} -Replaces: clickhouse-common-dbg -Conflicts: clickhouse-common-dbg -Description: debugging symbols for clickhouse-common-static - This package contains the debugging symbols for clickhouse-common. diff --git a/debian/rules b/debian/rules deleted file mode 100755 index 414d472c13d..00000000000 --- a/debian/rules +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/make -f -# -*- makefile -*- - -# Uncomment this to turn on verbose mode. -export DH_VERBOSE=1 - -# -pie only for static mode -export DEB_BUILD_MAINT_OPTIONS=hardening=-all - -# because copy_headers.sh have hardcoded path to build/include_directories.txt -BUILDDIR = obj-$(DEB_HOST_GNU_TYPE) -CURDIR = $(shell pwd) -DESTDIR = $(CURDIR)/debian/tmp - -DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH) - -ifeq ($(CCACHE_PREFIX),distcc) - THREADS_COUNT=$(shell distcc -j) -endif -ifeq ($(THREADS_COUNT),) - THREADS_COUNT=$(shell nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4) -endif -DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT) - -ifndef ENABLE_TESTS - CMAKE_FLAGS += -DENABLE_TESTS=0 -else -# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI - DEB_BUILD_OPTIONS+= nocheck - DEB_BUILD_OPTIONS+= nostrip -endif - -ifndef MAKE_TARGET - MAKE_TARGET = clickhouse-bundle -endif - -CMAKE_FLAGS += -DENABLE_UTILS=0 - -DEB_CC ?= $(shell which gcc-11 gcc-10 gcc-9 gcc | head -n1) -DEB_CXX ?= $(shell which g++-11 g++-10 g++-9 g++ | head -n1) - -ifdef DEB_CXX - DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) - DEB_HOST_GNU_TYPE := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) -ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE)) - CC := $(DEB_CC) - CXX := $(DEB_CXX) -else ifeq (clang,$(findstring clang,$(DEB_CXX))) -# If we crosscompile with clang, it knows what to do - CC := $(DEB_CC) - CXX := $(DEB_CXX) -else - CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC) - CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX) -endif -endif - -ifdef CXX - CMAKE_FLAGS += -DCMAKE_CXX_COMPILER=`which $(CXX)` -endif -ifdef CC - CMAKE_FLAGS += -DCMAKE_C_COMPILER=`which $(CC)` -endif - -ifndef DISABLE_NINJA - NINJA=$(shell which ninja) -ifneq ($(NINJA),) - CMAKE_FLAGS += -GNinja - export MAKE=$(NINJA) $(NINJA_FLAGS) -endif -endif - -ifndef DH_VERBOSE - CMAKE_FLAGS += -DCMAKE_VERBOSE_MAKEFILE=0 -endif - -# Useful for bulding on low memory systems -ifndef DISABLE_PARALLEL - DH_FLAGS += --parallel -else - THREADS_COUNT = 1 -endif - -%: - dh $@ $(DH_FLAGS) --buildsystem=cmake - -override_dh_auto_configure: - dh_auto_configure -- $(CMAKE_FLAGS) - -override_dh_auto_build: - # Fix for ninja. Do not add -O. - $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET) - -override_dh_auto_test: -ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) - cd $(BUILDDIR) && ctest -j$(THREADS_COUNT) -V -endif - -# Disable config.guess and config.sub update -override_dh_update_autotools_config: - -override_dh_clean: - rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs - dh_clean # -X contrib - -override_dh_strip: - #https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options -ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS))) - dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg -endif - -override_dh_install: - # Making docs - cp LICENSE debian/copyright - - ln -sf clickhouse-server.docs debian/clickhouse-client.docs - ln -sf clickhouse-server.docs debian/clickhouse-common-static.docs - - # systemd compatibility - mkdir -p $(DESTDIR)/etc/systemd/system/ - cp debian/clickhouse-server.service $(DESTDIR)/etc/systemd/system/ - - dh_install --list-missing --sourcedir=$(DESTDIR) - -override_dh_auto_install: - env DESTDIR=$(DESTDIR) $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) install - -override_dh_shlibdeps: - true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency. - -override_dh_builddeb: - dh_builddeb -- -Z gzip # Older systems don't have "xz", so use "gzip" instead. diff --git a/debian/source/format b/debian/source/format deleted file mode 100644 index 163aaf8d82b..00000000000 --- a/debian/source/format +++ /dev/null @@ -1 +0,0 @@ -3.0 (quilt) diff --git a/debian/source/options b/debian/source/options deleted file mode 100644 index 0ceb3b9e28b..00000000000 --- a/debian/source/options +++ /dev/null @@ -1,9 +0,0 @@ -tar-ignore -tar-ignore="build_*/*" -tar-ignore="workspace/*" -tar-ignore="contrib/poco/openssl/*" -tar-ignore="contrib/poco/gradle/*" -tar-ignore="contrib/poco/Data/SQLite/*" -tar-ignore="contrib/poco/PDF/*" -compression-level=3 -compression=gzip diff --git a/debian/watch b/debian/watch deleted file mode 100644 index ed3cab97ade..00000000000 --- a/debian/watch +++ /dev/null @@ -1,6 +0,0 @@ -version=4 - -opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)-stable\.tar\.gz%clickhouse-$1.tar.gz%" \ - https://github.com/ClickHouse/ClickHouse/tags \ - (?:.*?/)?v?(\d[\d.]*)-stable\.tar\.gz debian uupdate - diff --git a/docker/docs/builder/Dockerfile b/docker/docs/builder/Dockerfile index 906312a19a2..061251aa7f0 100644 --- a/docker/docs/builder/Dockerfile +++ b/docker/docs/builder/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #33610 # docker build -t clickhouse/docs-builder . FROM ubuntu:20.04 diff --git a/docker/docs/check/Dockerfile b/docker/docs/check/Dockerfile index 174be123eed..4eb03a91e7a 100644 --- a/docker/docs/check/Dockerfile +++ b/docker/docs/check/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #33610 # docker build -t clickhouse/docs-check . ARG FROM_TAG=latest FROM clickhouse/docs-builder:$FROM_TAG diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile new file mode 100644 index 00000000000..068377e8f8c --- /dev/null +++ b/docker/keeper/Dockerfile @@ -0,0 +1,74 @@ +FROM ubuntu:20.04 AS glibc-donor + +ARG TARGETARCH +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && ln -s "${rarch}-linux-gnu" /lib/linux-gnu + + +FROM alpine + +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 \ + TZ=UTC \ + CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml + +COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/ +COPY --from=glibc-donor /etc/nsswitch.conf /etc/ +COPY entrypoint.sh /entrypoint.sh + +ARG TARGETARCH +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \ + arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \ + esac + +ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" +ARG VERSION="22.4.1.917" +ARG PACKAGES="clickhouse-keeper" + +# user/group precreated explicitly with fixed uid/gid on purpose. +# It is especially important for rootless containers: in that case entrypoint +# can't do chown and owners of mounted volumes should be configured externally. +# We do that in advance at the begining of Dockerfile before any packages will be +# installed to prevent picking those uid / gid by some unrelated software. +# The same uid / gid (101) is used both for alpine and ubuntu. + + +ARG TARGETARCH +RUN arch=${TARGETARCH:-amd64} \ + && for package in ${PACKAGES}; do \ + { \ + { echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \ + } || \ + { echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \ + } ; \ + } || exit 1 \ + ; done \ + && rm /tmp/*.tgz /install -r \ + && addgroup -S -g 101 clickhouse \ + && adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse keeper" -u 101 clickhouse \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper \ + && chown clickhouse:clickhouse /var/lib/clickhouse \ + && chown root:clickhouse /var/log/clickhouse-keeper \ + && chmod +x /entrypoint.sh \ + && apk add --no-cache su-exec bash tzdata \ + && cp /usr/share/zoneinfo/UTC /etc/localtime \ + && echo "UTC" > /etc/timezone \ + && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper + + +EXPOSE 2181 10181 44444 + +VOLUME /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/keeper/Dockerfile.alpine b/docker/keeper/Dockerfile.alpine new file mode 120000 index 00000000000..1d1fe94df49 --- /dev/null +++ b/docker/keeper/Dockerfile.alpine @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file diff --git a/docker/keeper/entrypoint.sh b/docker/keeper/entrypoint.sh new file mode 100644 index 00000000000..3aacf655c28 --- /dev/null +++ b/docker/keeper/entrypoint.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +set +x +set -eo pipefail +shopt -s nullglob + +DO_CHOWN=1 +if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then + DO_CHOWN=0 +fi + +CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}" +CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}" + +# support --user +if [ "$(id -u)" = "0" ]; then + USER=$CLICKHOUSE_UID + GROUP=$CLICKHOUSE_GID + if command -v gosu &> /dev/null; then + gosu="gosu $USER:$GROUP" + elif command -v su-exec &> /dev/null; then + gosu="su-exec $USER:$GROUP" + else + echo "No gosu/su-exec detected!" + exit 1 + fi +else + USER="$(id -u)" + GROUP="$(id -g)" + gosu="" + DO_CHOWN=0 +fi + +KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/config.yaml}" + +if [ -f "$KEEPER_CONFIG" ] && ! $gosu test -f "$KEEPER_CONFIG" -a -r "$KEEPER_CONFIG"; then + echo "Configuration file '$KEEPER_CONFIG' isn't readable by user with id '$USER'" + exit 1 +fi + +DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}" +LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}" +LOG_PATH="${LOG_DIR}/clickhouse-keeper.log" +ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log" +COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log" +COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots" +CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0} + +for dir in "$DATA_DIR" \ + "$LOG_DIR" \ + "$TMP_DIR" \ + "$COORDINATION_LOG_DIR" \ + "$COORDINATION_SNAPSHOT_DIR" +do + # check if variable not empty + [ -z "$dir" ] && continue + # ensure directories exist + if ! mkdir -p "$dir"; then + echo "Couldn't create necessary directory: $dir" + exit 1 + fi + + if [ "$DO_CHOWN" = "1" ]; then + # ensure proper directories permissions + # but skip it for if directory already has proper premissions, cause recursive chown may be slow + if [ "$(stat -c %u "$dir")" != "$USER" ] || [ "$(stat -c %g "$dir")" != "$GROUP" ]; then + chown -R "$USER:$GROUP" "$dir" + fi + elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then + echo "Necessary directory '$dir' isn't accessible by user with id '$USER'" + exit 1 + fi +done + +# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments +if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then + # Watchdog is launched by default, but does not send SIGINT to the main process, + # so the container can't be finished by ctrl+c + export CLICKHOUSE_WATCHDOG_ENABLE + + cd /var/lib/clickhouse + + # There is a config file. It is already tested with gosu (if it is readably by keeper user) + if [ -f "$KEEPER_CONFIG" ]; then + exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@" + fi + + # There is no config file. Will use embedded one + exec $gosu /usr/bin/clickhouse-keeper --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@" +fi + +# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image +exec "$@" diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index e3e2e689b17..a57a734e3df 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -95,6 +95,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ && apt-get install gcc-11 g++-11 --yes \ && apt-get clean +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH +ARG NFPM_VERSION=2.15.0 + +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \ + && dpkg -i /tmp/nfpm.deb \ + && rm /tmp/nfpm.deb COPY build.sh / -CMD ["bash", "-c", "/build.sh 2>&1 | ts"] +CMD ["bash", "-c", "/build.sh 2>&1"] diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 2f18b07ffe1..2bedb50dd40 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -1,7 +1,13 @@ #!/usr/bin/env bash +exec &> >(ts) set -x -e +cache_status () { + ccache --show-config ||: + ccache --show-stats ||: +} + mkdir -p build/cmake/toolchain/darwin-x86_64 tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 @@ -19,15 +25,33 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" env cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. -ccache --show-config ||: -ccache --show-stats ||: +if [ "coverity" == "$COMBINED_OUTPUT" ] +then + mkdir -p /opt/cov-analysis + + wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /opt/cov-analysis --strip-components 1 + export PATH=$PATH:/opt/cov-analysis/bin + cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC" + SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int" +fi + +cache_status +# clear cache stats ccache --zero-stats ||: +# No quotes because I want it to expand to nothing if empty. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. -ninja $NINJA_FLAGS clickhouse-bundle +$SCAN_WRAPPER ninja $NINJA_FLAGS clickhouse-bundle -ccache --show-config ||: -ccache --show-stats ||: +cache_status + +if [ -n "$MAKE_DEB" ]; then + rm -rf /build/packages/root + # No quotes because I want it to expand to nothing if empty. + # shellcheck disable=SC2086 + DESTDIR=/build/packages/root ninja $NINJA_FLAGS install + bash -x /build/packages/build +fi mv ./programs/clickhouse* /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds @@ -77,6 +101,12 @@ then mv "$COMBINED_OUTPUT.tgz" /output fi +if [ "coverity" == "$COMBINED_OUTPUT" ] +then + tar -cv -I pigz -f "coverity-scan.tgz" cov-int + mv "coverity-scan.tgz" /output +fi + # Also build fuzzers if any sanitizer specified # if [ -n "$SANITIZER" ] # then @@ -84,8 +114,7 @@ fi # ../docker/packager/other/fuzzer.sh # fi -ccache --show-config ||: -ccache --show-stats ||: +cache_status if [ "${CCACHE_DEBUG:-}" == "1" ] then diff --git a/docker/packager/packager b/docker/packager/packager index 05b2e02df96..9a72a16bd70 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -#-*- coding: utf-8 -*- +# -*- coding: utf-8 -*- import subprocess import os import argparse @@ -8,36 +8,39 @@ import sys SCRIPT_PATH = os.path.realpath(__file__) -IMAGE_MAP = { - "deb": "clickhouse/deb-builder", - "binary": "clickhouse/binary-builder", -} def check_image_exists_locally(image_name): try: - output = subprocess.check_output("docker images -q {} 2> /dev/null".format(image_name), shell=True) + output = subprocess.check_output( + f"docker images -q {image_name} 2> /dev/null", shell=True + ) return output != "" - except subprocess.CalledProcessError as ex: + except subprocess.CalledProcessError: return False + def pull_image(image_name): try: - subprocess.check_call("docker pull {}".format(image_name), shell=True) + subprocess.check_call(f"docker pull {image_name}", shell=True) return True - except subprocess.CalledProcessError as ex: - logging.info("Cannot pull image {}".format(image_name)) + except subprocess.CalledProcessError: + logging.info(f"Cannot pull image {image_name}".format()) return False + def build_image(image_name, filepath): context = os.path.dirname(filepath) - build_cmd = "docker build --network=host -t {} -f {} {}".format(image_name, filepath, context) - logging.info("Will build image with cmd: '{}'".format(build_cmd)) + build_cmd = f"docker build --network=host -t {image_name} -f {filepath} {context}" + logging.info("Will build image with cmd: '%s'", build_cmd) subprocess.check_call( build_cmd, shell=True, ) -def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version): + +def run_docker_image_with_env( + image_name, output, env_variables, ch_root, ccache_dir, docker_image_version +): env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -47,28 +50,52 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache else: interactive = "" - cmd = "docker run --network=host --rm --volume={output_path}:/output --volume={ch_root}:/build --volume={ccache_dir}:/ccache {env} {interactive} {img_name}".format( - output_path=output, - ch_root=ch_root, - ccache_dir=ccache_dir, - env=env_part, - img_name=image_name + ":" + docker_image_version, - interactive=interactive + cmd = ( + f"docker run --network=host --rm --volume={output}:/output " + f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} " + f"{interactive} {image_name}:{docker_image_version}" ) - logging.info("Will build ClickHouse pkg with cmd: '{}'".format(cmd)) + logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd) subprocess.check_call(cmd, shell=True) -def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries): + +def is_release_build(build_type, package_type, sanitizer, split_binary): + return ( + build_type == "" + and package_type == "deb" + and sanitizer == "" + and not split_binary + ) + + +def parse_env_variables( + build_type, + compiler, + sanitizer, + package_type, + image_type, + cache, + distcc_hosts, + split_binary, + clang_tidy, + version, + author, + official, + additional_pkgs, + with_coverage, + with_binaries, +): DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" FREEBSD_SUFFIX = "-freebsd" - PPC_SUFFIX = '-ppc64le' + PPC_SUFFIX = "-ppc64le" result = [] - cmake_flags = ['$CMAKE_FLAGS'] + result.append("OUTPUT_DIR=/output") + cmake_flags = ["$CMAKE_FLAGS"] is_cross_darwin = compiler.endswith(DARWIN_SUFFIX) is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) @@ -77,61 +104,93 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) if is_cross_darwin: - cc = compiler[:-len(DARWIN_SUFFIX)] + cc = compiler[: -len(DARWIN_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar") - cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/x86_64-apple-darwin-install_name_tool") - cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib") + cmake_flags.append( + "-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/" + "x86_64-apple-darwin-install_name_tool" + ) + cmake_flags.append( + "-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib" + ) cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld") - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake") + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" + ) elif is_cross_darwin_arm: - cc = compiler[:-len(DARWIN_ARM_SUFFIX)] + cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") - cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool") - cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib") + cmake_flags.append( + "-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/" + "aarch64-apple-darwin-install_name_tool" + ) + cmake_flags.append( + "-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib" + ) cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld") - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake") + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake" + ) elif is_cross_arm: - cc = compiler[:-len(ARM_SUFFIX)] - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake") - result.append("DEB_ARCH_FLAG=-aarm64") + cc = compiler[: -len(ARM_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake" + ) + result.append("DEB_ARCH=arm64") elif is_cross_freebsd: - cc = compiler[:-len(FREEBSD_SUFFIX)] - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake") + cc = compiler[: -len(FREEBSD_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake" + ) elif is_cross_ppc: - cc = compiler[:-len(PPC_SUFFIX)] - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake") + cc = compiler[: -len(PPC_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" + ) else: cc = compiler - result.append("DEB_ARCH_FLAG=-aamd64") + result.append("DEB_ARCH=amd64") - cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++') + cxx = cc.replace("gcc", "g++").replace("clang", "clang++") if image_type == "deb": - result.append("DEB_CC={}".format(cc)) - result.append("DEB_CXX={}".format(cxx)) - # For building fuzzers - result.append("CC={}".format(cc)) - result.append("CXX={}".format(cxx)) - elif image_type == "binary": - result.append("CC={}".format(cc)) - result.append("CXX={}".format(cxx)) - cmake_flags.append('-DCMAKE_C_COMPILER=`which {}`'.format(cc)) - cmake_flags.append('-DCMAKE_CXX_COMPILER=`which {}`'.format(cxx)) + result.append("MAKE_DEB=true") + cmake_flags.append("-DENABLE_TESTS=0") + cmake_flags.append("-DENABLE_UTILS=0") + cmake_flags.append("-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON") + cmake_flags.append("-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON") + cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON") + cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr") + cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc") + cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") + cmake_flags.append("-DBUILD_STANDALONE_KEEPER=ON") + if is_release_build(build_type, package_type, sanitizer, split_binary): + cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON") + + result.append(f"CC={cc}") + result.append(f"CXX={cxx}") + cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}") + cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") # Create combined output archive for split build and for performance tests. if package_type == "performance": result.append("COMBINED_OUTPUT=performance") cmake_flags.append("-DENABLE_TESTS=0") + elif package_type == "coverity": + result.append("COMBINED_OUTPUT=coverity") + result.append('COVERITY_TOKEN="$COVERITY_TOKEN"') elif split_binary: result.append("COMBINED_OUTPUT=shared_build") if sanitizer: - result.append("SANITIZER={}".format(sanitizer)) + result.append(f"SANITIZER={sanitizer}") if build_type: - result.append("BUILD_TYPE={}".format(build_type)) + result.append(f"BUILD_TYPE={build_type.capitalize()}") + else: + result.append("BUILD_TYPE=None") - if cache == 'distcc': - result.append("CCACHE_PREFIX={}".format(cache)) + if cache == "distcc": + result.append(f"CCACHE_PREFIX={cache}") if cache: result.append("CCACHE_DIR=/ccache") @@ -142,109 +201,191 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ # result.append("CCACHE_UMASK=777") if distcc_hosts: - hosts_with_params = ["{}/24,lzo".format(host) for host in distcc_hosts] + ["localhost/`nproc`"] - result.append('DISTCC_HOSTS="{}"'.format(" ".join(hosts_with_params))) + hosts_with_params = [f"{host}/24,lzo" for host in distcc_hosts] + [ + "localhost/`nproc`" + ] + result.append('DISTCC_HOSTS="' + " ".join(hosts_with_params) + '"') elif cache == "distcc": - result.append('DISTCC_HOSTS="{}"'.format("localhost/`nproc`")) + result.append('DISTCC_HOSTS="localhost/`nproc`"') - if alien_pkgs: - result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'") + if additional_pkgs: + result.append("MAKE_APK=true") + result.append("MAKE_RPM=true") + result.append("MAKE_TGZ=true") if with_binaries == "programs": - result.append('BINARY_OUTPUT=programs') + result.append("BINARY_OUTPUT=programs") elif with_binaries == "tests": - result.append('ENABLE_TESTS=1') - result.append('BINARY_OUTPUT=tests') - cmake_flags.append('-DENABLE_TESTS=1') + result.append("ENABLE_TESTS=1") + result.append("BINARY_OUTPUT=tests") + cmake_flags.append("-DENABLE_TESTS=1") if split_binary: - cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') + cmake_flags.append( + "-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 " + "-DCLICKHOUSE_SPLIT_BINARY=1" + ) # We can't always build utils because it requires too much space, but # we have to build them at least in some way in CI. The split build is # probably the least heavy disk-wise. - cmake_flags.append('-DENABLE_UTILS=1') + cmake_flags.append("-DENABLE_UTILS=1") if clang_tidy: - cmake_flags.append('-DENABLE_CLANG_TIDY=1') - cmake_flags.append('-DENABLE_UTILS=1') - cmake_flags.append('-DENABLE_TESTS=1') - cmake_flags.append('-DENABLE_EXAMPLES=1') + cmake_flags.append("-DENABLE_CLANG_TIDY=1") + cmake_flags.append("-DENABLE_UTILS=1") + cmake_flags.append("-DENABLE_TESTS=1") + cmake_flags.append("-DENABLE_EXAMPLES=1") # Don't stop on first error to find more clang-tidy errors in one run. - result.append('NINJA_FLAGS=-k0') + result.append("NINJA_FLAGS=-k0") if with_coverage: - cmake_flags.append('-DWITH_COVERAGE=1') + cmake_flags.append("-DWITH_COVERAGE=1") if version: - result.append("VERSION_STRING='{}'".format(version)) + result.append(f"VERSION_STRING='{version}'") if author: - result.append("AUTHOR='{}'".format(author)) + result.append(f"AUTHOR='{author}'") if official: - cmake_flags.append('-DYANDEX_OFFICIAL_BUILD=1') + cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1") - result.append('CMAKE_FLAGS="' + ' '.join(cmake_flags) + '"') + result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"') return result + if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse building script using prebuilt Docker image") - # 'performance' creates a combined .tgz with server and configs to be used for performance test. - parser.add_argument("--package-type", choices=['deb', 'binary', 'performance'], required=True) - parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)) + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="ClickHouse building script using prebuilt Docker image", + ) + # 'performance' creates a combined .tgz with server + # and configs to be used for performance test. + parser.add_argument( + "--package-type", + choices=["deb", "binary", "performance", "coverity"], + required=True, + ) + parser.add_argument( + "--clickhouse-repo-path", + default=os.path.join( + os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir + ), + help="ClickHouse git repository", + ) parser.add_argument("--output-dir", required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") - parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", - "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", - "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", "clang-13-ppc64le", - "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") - parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") + parser.add_argument( + "--compiler", + choices=( + "clang-11", + "clang-11-darwin", + "clang-11-darwin-aarch64", + "clang-11-aarch64", + "clang-12", + "clang-12-darwin", + "clang-12-darwin-aarch64", + "clang-12-aarch64", + "clang-13", + "clang-13-darwin", + "clang-13-darwin-aarch64", + "clang-13-aarch64", + "clang-13-ppc64le", + "clang-11-freebsd", + "clang-12-freebsd", + "clang-13-freebsd", + "gcc-11", + ), + default="clang-13", + help="a compiler to use", + ) + parser.add_argument( + "--sanitizer", + choices=("address", "thread", "memory", "undefined", ""), + default="", + ) parser.add_argument("--split-binary", action="store_true") parser.add_argument("--clang-tidy", action="store_true") - parser.add_argument("--cache", choices=("", "ccache", "distcc"), default="") - parser.add_argument("--ccache_dir", default= os.getenv("HOME", "") + '/.ccache') + parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") + parser.add_argument( + "--ccache_dir", + default=os.getenv("HOME", "") + "/.ccache", + help="a directory with ccache", + ) parser.add_argument("--distcc-hosts", nargs="+") parser.add_argument("--force-build-image", action="store_true") parser.add_argument("--version") - parser.add_argument("--author", default="clickhouse") + parser.add_argument("--author", default="clickhouse", help="a package author") parser.add_argument("--official", action="store_true") - parser.add_argument("--alien-pkgs", nargs='+', default=[]) + parser.add_argument("--additional-pkgs", action="store_true") parser.add_argument("--with-coverage", action="store_true") - parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="") - parser.add_argument("--docker-image-version", default="latest") + parser.add_argument( + "--with-binaries", choices=("programs", "tests", ""), default="" + ) + parser.add_argument( + "--docker-image-version", default="latest", help="docker image tag to use" + ) args = parser.parse_args() if not os.path.isabs(args.output_dir): args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir)) - image_type = 'binary' if args.package_type == 'performance' else args.package_type - image_name = IMAGE_MAP[image_type] + image_type = ( + "binary" + if args.package_type in ("performance", "coverity") + else args.package_type + ) + image_name = "clickhouse/binary-builder" if not os.path.isabs(args.clickhouse_repo_path): ch_root = os.path.abspath(os.path.join(os.getcwd(), args.clickhouse_repo_path)) else: ch_root = args.clickhouse_repo_path - if args.alien_pkgs and not image_type == "deb": - raise Exception("Can add alien packages only in deb build") + if args.additional_pkgs and image_type != "deb": + raise Exception("Can build additional packages only in deb build") - if args.with_binaries != "" and not image_type == "deb": + if args.with_binaries != "" and image_type != "deb": raise Exception("Can add additional binaries only in deb build") if args.with_binaries != "" and image_type == "deb": - logging.info("Should place {} to output".format(args.with_binaries)) + logging.info("Should place %s to output", args.with_binaries) dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") image_with_version = image_name + ":" + args.docker_image_version - if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image: + if ( + image_type != "freebsd" + and not check_image_exists_locally(image_name) + or args.force_build_image + ): if not pull_image(image_with_version) or args.force_build_image: build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( - args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, - args.cache, args.distcc_hosts, args.split_binary, args.clang_tidy, - args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries) + args.build_type, + args.compiler, + args.sanitizer, + args.package_type, + image_type, + args.cache, + args.distcc_hosts, + args.split_binary, + args.clang_tidy, + args.version, + args.author, + args.official, + args.additional_pkgs, + args.with_coverage, + args.with_binaries, + ) - run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir, args.docker_image_version) - logging.info("Output placed into {}".format(args.output_dir)) + run_docker_image_with_env( + image_name, + args.output_dir, + env_prepared, + ch_root, + args.ccache_dir, + args.docker_image_version, + ) + logging.info("Output placed into %s", args.output_dir) diff --git a/docker/server/.gitignore b/docker/server/.gitignore deleted file mode 100644 index 692758d55aa..00000000000 --- a/docker/server/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -alpine-root/* -tgz-packages/* diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile deleted file mode 100644 index 5b7990ab030..00000000000 --- a/docker/server/Dockerfile +++ /dev/null @@ -1,122 +0,0 @@ -FROM ubuntu:20.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ARG repository="deb https://packages.clickhouse.com/deb stable main" -ARG version=22.1.1.* - -# set non-empty deb_location_url url to create a docker image -# from debs created by CI build, for example: -# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852 -ARG deb_location_url="" - -# set non-empty single_binary_location_url to create docker image -# from a single binary url (useful for non-standard builds - with sanitizers, for arm64). -# for example (run on aarch64 server): -# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm -# note: clickhouse-odbc-bridge is not supported there. -ARG single_binary_location_url="" - -# see https://github.com/moby/moby/issues/4032#issuecomment-192327844 -ARG DEBIAN_FRONTEND=noninteractive - -# user/group precreated explicitly with fixed uid/gid on purpose. -# It is especially important for rootless containers: in that case entrypoint -# can't do chown and owners of mounted volumes should be configured externally. -# We do that in advance at the begining of Dockerfile before any packages will be -# installed to prevent picking those uid / gid by some unrelated software. -# The same uid / gid (101) is used both for alpine and ubuntu. - -# To drop privileges, we need 'su' command, that simply changes uid and gid. -# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux: -# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking -# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal -# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does) -# and for these reasons people are using alternatives to the 'su' command in Docker, -# that don't mess with the terminal, don't care about closing the opened files, etc... -# but can only be safe to drop privileges inside Docker. -# The question - what implementation of 'su' command to use. -# It should be a simple script doing about just two syscalls. -# Some people tend to use 'gosu' tool that is written in Go. -# It is not used for several reasons: -# 1. Dependency on some foreign code in yet another programming language - does not sound alright. -# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners. - -COPY su-exec.c /su-exec.c - -RUN groupadd -r clickhouse --gid=101 \ - && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ - && apt-get update \ - && apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - dirmngr \ - gnupg \ - locales \ - wget \ - tzdata \ - && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ - && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ - && if [ -n "$deb_location_url" ]; then \ - echo "installing from custom url with deb packages: $deb_location_url" \ - rm -rf /tmp/clickhouse_debs \ - && mkdir -p /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-common-static_${version}_amd64.deb" -P /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-client_${version}_all.deb" -P /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-server_${version}_all.deb" -P /tmp/clickhouse_debs \ - && dpkg -i /tmp/clickhouse_debs/*.deb ; \ - elif [ -n "$single_binary_location_url" ]; then \ - echo "installing from single binary url: $single_binary_location_url" \ - && rm -rf /tmp/clickhouse_binary \ - && mkdir -p /tmp/clickhouse_binary \ - && wget --progress=bar:force:noscroll "$single_binary_location_url" -O /tmp/clickhouse_binary/clickhouse \ - && chmod +x /tmp/clickhouse_binary/clickhouse \ - && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \ - else \ - echo "installing from repository: $repository" \ - && apt-get update \ - && apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ - && apt-get install --allow-unauthenticated --yes --no-install-recommends \ - clickhouse-common-static=$version \ - clickhouse-client=$version \ - clickhouse-server=$version ; \ - fi \ - && apt-get install -y --no-install-recommends tcc libc-dev && \ - tcc /su-exec.c -o /bin/su-exec && \ - chown root:root /bin/su-exec && \ - chmod 0755 /bin/su-exec && \ - rm /su-exec.c && \ - apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \ - && clickhouse-local -q 'SELECT * FROM system.build_options' \ - && rm -rf \ - /var/lib/apt/lists/* \ - /var/cache/debconf \ - /tmp/* \ - && apt-get clean \ - && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ - && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client - -# we need to allow "others" access to clickhouse folder, because docker container -# can be started with arbitrary uid (openshift usecase) - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 -ENV TZ UTC - -RUN mkdir /docker-entrypoint-initdb.d - -COPY docker_related_config.xml /etc/clickhouse-server/config.d/ -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh - -EXPOSE 9000 8123 9009 -VOLUME /var/lib/clickhouse - -ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile new file mode 120000 index 00000000000..fd45f0f7c7c --- /dev/null +++ b/docker/server/Dockerfile @@ -0,0 +1 @@ +Dockerfile.ubuntu \ No newline at end of file diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index cd192c0c9da..5aaf5dd5511 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -1,3 +1,14 @@ +FROM ubuntu:20.04 AS glibc-donor +ARG TARGETARCH + +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && ln -s "${rarch}-linux-gnu" /lib/linux-gnu + + FROM alpine ENV LANG=en_US.UTF-8 \ @@ -6,7 +17,24 @@ ENV LANG=en_US.UTF-8 \ TZ=UTC \ CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml -COPY alpine-root/ / +COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/ +COPY --from=glibc-donor /etc/nsswitch.conf /etc/ +COPY docker_related_config.xml /etc/clickhouse-server/config.d/ +COPY entrypoint.sh /entrypoint.sh + +ARG TARGETARCH + +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \ + arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \ + esac + +# lts / testing / prestable / etc +ARG REPO_CHANNEL="stable" +ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" +ARG VERSION="20.9.3.45" +ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. # It is especially important for rootless containers: in that case entrypoint @@ -15,9 +43,23 @@ COPY alpine-root/ / # installed to prevent picking those uid / gid by some unrelated software. # The same uid / gid (101) is used both for alpine and ubuntu. -RUN addgroup -S -g 101 clickhouse \ +RUN arch=${TARGETARCH:-amd64} \ + && for package in ${PACKAGES}; do \ + { \ + { echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \ + } || \ + { echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \ + } ; \ + } || exit 1 \ + ; done \ + && rm /tmp/*.tgz /install -r \ + && addgroup -S -g 101 clickhouse \ && adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse server" -u 101 clickhouse \ - && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server/config.d /etc/clickhouse-server/users.d /etc/clickhouse-client /docker-entrypoint-initdb.d \ && chown clickhouse:clickhouse /var/lib/clickhouse \ && chown root:clickhouse /var/log/clickhouse-server \ && chmod +x /entrypoint.sh \ diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu new file mode 100644 index 00000000000..6e93bd97036 --- /dev/null +++ b/docker/server/Dockerfile.ubuntu @@ -0,0 +1,129 @@ +FROM ubuntu:20.04 + +# see https://github.com/moby/moby/issues/4032#issuecomment-192327844 +ARG DEBIAN_FRONTEND=noninteractive + +COPY su-exec.c /su-exec.c + +# ARG for quick switch to a given ubuntu mirror +ARG apt_archive="http://archive.ubuntu.com" +RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \ + && groupadd -r clickhouse --gid=101 \ + && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ + && apt-get update \ + && apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + dirmngr \ + gnupg \ + locales \ + wget \ + tzdata \ + && apt-get install -y --no-install-recommends tcc libc-dev && \ + tcc /su-exec.c -o /bin/su-exec && \ + chown root:root /bin/su-exec && \ + chmod 0755 /bin/su-exec && \ + rm /su-exec.c && \ + apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \ + && apt-get clean + +ARG REPO_CHANNEL="stable" +ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" +ARG VERSION=22.1.1.* +ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" + +# set non-empty deb_location_url url to create a docker image +# from debs created by CI build, for example: +# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852 +ARG deb_location_url="" + +# set non-empty single_binary_location_url to create docker image +# from a single binary url (useful for non-standard builds - with sanitizers, for arm64). +# for example (run on aarch64 server): +# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm +# note: clickhouse-odbc-bridge is not supported there. +ARG single_binary_location_url="" + +# user/group precreated explicitly with fixed uid/gid on purpose. +# It is especially important for rootless containers: in that case entrypoint +# can't do chown and owners of mounted volumes should be configured externally. +# We do that in advance at the begining of Dockerfile before any packages will be +# installed to prevent picking those uid / gid by some unrelated software. +# The same uid / gid (101) is used both for alpine and ubuntu. + +# To drop privileges, we need 'su' command, that simply changes uid and gid. +# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux: +# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking +# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal +# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does) +# and for these reasons people are using alternatives to the 'su' command in Docker, +# that don't mess with the terminal, don't care about closing the opened files, etc... +# but can only be safe to drop privileges inside Docker. +# The question - what implementation of 'su' command to use. +# It should be a simple script doing about just two syscalls. +# Some people tend to use 'gosu' tool that is written in Go. +# It is not used for several reasons: +# 1. Dependency on some foreign code in yet another programming language - does not sound alright. +# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners. + +ARG TARGETARCH + +RUN arch=${TARGETARCH:-amd64} \ + && if [ -n "${deb_location_url}" ]; then \ + echo "installing from custom url with deb packages: ${deb_location_url}" \ + rm -rf /tmp/clickhouse_debs \ + && mkdir -p /tmp/clickhouse_debs \ + && for package in ${PACKAGES}; do \ + { wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_${arch}.deb" -P /tmp/clickhouse_debs || \ + wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_all.deb" -P /tmp/clickhouse_debs ; } \ + || exit 1 \ + ; done \ + && dpkg -i /tmp/clickhouse_debs/*.deb ; \ + elif [ -n "${single_binary_location_url}" ]; then \ + echo "installing from single binary url: ${single_binary_location_url}" \ + && rm -rf /tmp/clickhouse_binary \ + && mkdir -p /tmp/clickhouse_binary \ + && wget --progress=bar:force:noscroll "${single_binary_location_url}" -O /tmp/clickhouse_binary/clickhouse \ + && chmod +x /tmp/clickhouse_binary/clickhouse \ + && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \ + else \ + mkdir -p /etc/apt/sources.list.d \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ + && echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \ + && echo "installing from repository: ${REPOSITORY}" \ + && apt-get update \ + && apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ + && for package in ${PACKAGES}; do \ + packages="${packages} ${package}=${VERSION}" \ + ; done \ + && apt-get install --allow-unauthenticated --yes --no-install-recommends ${packages} || exit 1 \ + ; fi \ + && clickhouse-local -q 'SELECT * FROM system.build_options' \ + && rm -rf \ + /var/lib/apt/lists/* \ + /var/cache/debconf \ + /tmp/* \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ + && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client + +# we need to allow "others" access to clickhouse folder, because docker container +# can be started with arbitrary uid (openshift usecase) + +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 +ENV TZ UTC + +RUN mkdir /docker-entrypoint-initdb.d + +COPY docker_related_config.xml /etc/clickhouse-server/config.d/ +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +EXPOSE 9000 8123 9009 +VOLUME /var/lib/clickhouse + +ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh deleted file mode 100755 index 1b448c61fbb..00000000000 --- a/docker/server/alpine-build.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -set -x - -REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc -REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}" -VERSION="${VERSION:-20.9.3.45}" -DOCKER_IMAGE="${DOCKER_IMAGE:-clickhouse/clickhouse-server}" - -# where original files live -DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}" - -# we will create root for our image here -CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root" - -# clean up the root from old runs, it's reconstructed each time -rm -rf "$CONTAINER_ROOT_FOLDER" -mkdir -p "$CONTAINER_ROOT_FOLDER" - -# where to put downloaded tgz -TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages" -mkdir -p "$TGZ_PACKAGES_FOLDER" - -PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" ) - -# download tars from the repo -for package in "${PACKAGES[@]}" -do - wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" -done - -# unpack tars -for package in "${PACKAGES[@]}" -do - tar xvzf "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" --strip-components=2 -C "$CONTAINER_ROOT_FOLDER" -done - -# prepare few more folders -mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \ - "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d" \ - "${CONTAINER_ROOT_FOLDER}/var/log/clickhouse-server" \ - "${CONTAINER_ROOT_FOLDER}/var/lib/clickhouse" \ - "${CONTAINER_ROOT_FOLDER}/docker-entrypoint-initdb.d" \ - "${CONTAINER_ROOT_FOLDER}/lib64" - -cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/" -cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh" - -## get glibc components from ubuntu 20.04 and put them to expected place -docker pull ubuntu:20.04 -ubuntu20image=$(docker create --rm ubuntu:20.04) -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libc.so.6 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libdl.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" -docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc" - -docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull -rm -rf "$CONTAINER_ROOT_FOLDER" diff --git a/docker/server/local.Dockerfile b/docker/server/local.Dockerfile deleted file mode 100644 index 0d86c9ce45a..00000000000 --- a/docker/server/local.Dockerfile +++ /dev/null @@ -1,47 +0,0 @@ -# Since right now we can't set volumes to the docker during build, we split building container in stages: -# 1. build base container -# 2. run base conatiner with mounted volumes -# 3. commit container as image -# 4. build final container atop that image -# Middle steps are performed by the bash script. - -FROM ubuntu:18.04 as clickhouse-server-base -ARG gosu_ver=1.14 - -VOLUME /packages/ - -# update to allow installing dependencies of clickhouse automatically -RUN apt update; \ - DEBIAN_FRONTEND=noninteractive \ - apt install -y locales; - -ADD https://github.com/tianon/gosu/releases/download/${gosu_ver}/gosu-amd64 /bin/gosu - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically. -CMD DEBIAN_FRONTEND=noninteractive \ - apt install -y \ - /packages/clickhouse-common-static_*.deb \ - /packages/clickhouse-server_*.deb ; - -FROM clickhouse-server-base:postinstall as clickhouse-server - -RUN mkdir /docker-entrypoint-initdb.d - -COPY docker_related_config.xml /etc/clickhouse-server/config.d/ -COPY entrypoint.sh /entrypoint.sh - -RUN chmod +x \ - /entrypoint.sh \ - /bin/gosu - -EXPOSE 9000 8123 9009 -VOLUME /var/lib/clickhouse - -ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index bd1e0292636..31ec52b1d5a 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -115,6 +115,7 @@ function start_server function clone_root { + git config --global --add safe.directory "$FASTTEST_SOURCE" git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt" ( @@ -267,6 +268,7 @@ function run_tests local test_opts=( --hung-check --fast-tests-only + --no-random-settings --no-long --testname --shard diff --git a/docker/test/fuzzer/generate-test-j2.py b/docker/test/fuzzer/generate-test-j2.py index bcc1bf6bc84..11525163ed8 100755 --- a/docker/test/fuzzer/generate-test-j2.py +++ b/docker/test/fuzzer/generate-test-j2.py @@ -11,7 +11,7 @@ def removesuffix(text, suffix): https://www.python.org/dev/peps/pep-0616/ """ if suffix and text.endswith(suffix): - return text[:-len(suffix)] + return text[: -len(suffix)] else: return text[:] diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index e18c07bf2c1..32799a669eb 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -13,7 +13,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"} -BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} +BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function clone { @@ -226,7 +226,6 @@ quit --receive_data_timeout_ms=10000 \ --stacktrace \ --query-fuzzer-runs=1000 \ - --testmode \ --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ > >(tail -n 100000 > fuzzer.log) \ diff --git a/docker/test/integration/hive_server/http_api_server.py b/docker/test/integration/hive_server/http_api_server.py index 4818b785c89..8a9d3da4846 100644 --- a/docker/test/integration/hive_server/http_api_server.py +++ b/docker/test/integration/hive_server/http_api_server.py @@ -3,55 +3,55 @@ import subprocess import datetime from flask import Flask, flash, request, redirect, url_for + def run_command(command, wait=False): print("{} - execute shell command:{}".format(datetime.datetime.now(), command)) lines = [] - p = subprocess.Popen(command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - shell=True) + p = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True + ) if wait: - for l in iter(p.stdout.readline, b''): + for l in iter(p.stdout.readline, b""): lines.append(l) p.poll() return (lines, p.returncode) else: - return(iter(p.stdout.readline, b''), 0) + return (iter(p.stdout.readline, b""), 0) -UPLOAD_FOLDER = './' -ALLOWED_EXTENSIONS = {'txt', 'sh'} +UPLOAD_FOLDER = "./" +ALLOWED_EXTENSIONS = {"txt", "sh"} app = Flask(__name__) -app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER +app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER -@app.route('/') + +@app.route("/") def hello_world(): - return 'Hello World' + return "Hello World" def allowed_file(filename): - return '.' in filename and \ - filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS -@app.route('/upload', methods=['GET', 'POST']) +@app.route("/upload", methods=["GET", "POST"]) def upload_file(): - if request.method == 'POST': + if request.method == "POST": # check if the post request has the file part - if 'file' not in request.files: - flash('No file part') + if "file" not in request.files: + flash("No file part") return redirect(request.url) - file = request.files['file'] + file = request.files["file"] # If the user does not select a file, the browser submits an # empty file without a filename. - if file.filename == '': - flash('No selected file') + if file.filename == "": + flash("No selected file") return redirect(request.url) if file and allowed_file(file.filename): filename = file.filename - file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) - return redirect(url_for('upload_file', name=filename)) - return ''' + file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename)) + return redirect(url_for("upload_file", name=filename)) + return """ Upload new File

Upload new File

@@ -59,12 +59,15 @@ def upload_file(): - ''' -@app.route('/run', methods=['GET', 'POST']) + """ + + +@app.route("/run", methods=["GET", "POST"]) def parse_request(): data = request.data # data is empty run_command(data, wait=True) - return 'Ok' + return "Ok" -if __name__ == '__main__': - app.run(port=5011) + +if __name__ == "__main__": + app.run(port=5011) diff --git a/docker/test/integration/mysql_js_client/Dockerfile b/docker/test/integration/mysql_js_client/Dockerfile index b1397b40d38..4c9df10ace1 100644 --- a/docker/test/integration/mysql_js_client/Dockerfile +++ b/docker/test/integration/mysql_js_client/Dockerfile @@ -1,8 +1,10 @@ # docker build -t clickhouse/mysql-js-client . # MySQL JavaScript client docker container -FROM node:8 +FROM node:16.14.2 + +WORKDIR /usr/app RUN npm install mysql -COPY ./test.js test.js +COPY ./test.js ./test.js diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index d7534270e2c..4dec82234bc 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} @@ -10,7 +10,7 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then CLICKHOUSE_REPO_PATH=ch rm -rf ch ||: mkdir ch ||: - wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz" + wget -nv -nd -c "https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz" tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz ls -lath ||: fi diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 16ac304d7fb..6297bbead70 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -362,19 +362,6 @@ function get_profiles clickhouse-client --port $RIGHT_SERVER_PORT --query "select 1" } -function build_log_column_definitions -{ -# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an -# absolutely atrocious way. This should be done by the file() function itself. -for x in {right,left}-{addresses,{query,query-thread,trace,{async-,}metric}-log}.tsv -do - paste -d' ' \ - <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \ - <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \ - | tr '\n' ', ' | sed 's/,$//' > "$x.columns" -done -} - # Build and analyze randomization distribution for all queries. function analyze_queries { @@ -382,8 +369,6 @@ rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.t rm -rf analyze ||: mkdir analyze analyze/tmp ||: -build_log_column_definitions - # Split the raw test output into files suitable for analysis. # To debug calculations only for a particular test, substitute a suitable # wildcard here, e.g. `for test_file in modulo-raw.tsv`. @@ -422,12 +407,10 @@ create table partial_query_times engine File(TSVWithNamesAndTypes, -- Process queries that were run normally, on both servers. create view left_query_log as select * - from file('left-query-log.tsv', TSVWithNamesAndTypes, - '$(cat "left-query-log.tsv.columns")'); + from file('left-query-log.tsv', TSVWithNamesAndTypes); create view right_query_log as select * - from file('right-query-log.tsv', TSVWithNamesAndTypes, - '$(cat "right-query-log.tsv.columns")'); + from file('right-query-log.tsv', TSVWithNamesAndTypes); create view query_logs as select 0 version, query_id, ProfileEvents, @@ -645,8 +628,6 @@ mkdir report report/tmp ||: rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||: -build_log_column_definitions - cat analyze/errors.log >> report/errors.log ||: cat profile-errors.log >> report/errors.log ||: @@ -1028,8 +1009,7 @@ create table unstable_query_runs engine File(TSVWithNamesAndTypes, ; create view query_log as select * - from file('$version-query-log.tsv', TSVWithNamesAndTypes, - '$(cat "$version-query-log.tsv.columns")'); + from file('$version-query-log.tsv', TSVWithNamesAndTypes); create table unstable_run_metrics engine File(TSVWithNamesAndTypes, 'unstable-run-metrics.$version.rep') as @@ -1057,8 +1037,7 @@ create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes, array join v, n; create view trace_log as select * - from file('$version-trace-log.tsv', TSVWithNamesAndTypes, - '$(cat "$version-trace-log.tsv.columns")'); + from file('$version-trace-log.tsv', TSVWithNamesAndTypes); create view addresses_src as select addr, -- Some functions change name between builds, e.g. '__clone' or 'clone' or @@ -1067,8 +1046,7 @@ create view addresses_src as select addr, [name, 'clone.S (filtered by script)', 'pthread_cond_timedwait (filtered by script)'] -- this line is a subscript operator of the above array [1 + multiSearchFirstIndex(name, ['clone.S', 'pthread_cond_timedwait'])] name - from file('$version-addresses.tsv', TSVWithNamesAndTypes, - '$(cat "$version-addresses.tsv.columns")'); + from file('$version-addresses.tsv', TSVWithNamesAndTypes); create table addresses_join_$version engine Join(any, left, address) as select addr address, name from addresses_src; @@ -1195,15 +1173,12 @@ done function report_metrics { -build_log_column_definitions - rm -rf metrics ||: mkdir metrics clickhouse-local --query " create view right_async_metric_log as - select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes, - '$(cat right-async-metric-log.tsv.columns)') + select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes) ; -- Use the right log as time reference because it may have higher precision. @@ -1211,8 +1186,7 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as with (select min(event_time) from right_async_metric_log) as min_time select metric, r.event_time - min_time event_time, l.value as left, r.value as right from right_async_metric_log r - asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes, - '$(cat left-async-metric-log.tsv.columns)') l + asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes) l on l.metric = r.metric and r.event_time <= l.event_time order by metric, event_time ; @@ -1294,15 +1268,15 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') select '' test_name, '$(sed -n 's/.*/\1/p' report.html)' test_status, 0 test_duration_ms, - 'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url + 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url union all select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms, - 'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.' + 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.' || test || '.' || toString(query_index) report_url from queries where changed_fail != 0 and diff > 0 union all select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms, - 'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.' + 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.' || test || '.' || toString(query_index) report_url from queries where unstable_fail != 0 ) @@ -1378,7 +1352,7 @@ $REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/ EOF # Also insert some data about the check into the CI checks table. - "${client[@]}" --query "INSERT INTO "'"'"gh-data"'"'".checks FORMAT TSVWithNamesAndTypes" \ + "${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \ < ci-checks.tsv set -x diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index 8fa6eb5ec83..ae9e677713f 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -16,26 +16,17 @@ right_sha=$4 datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"} declare -A dataset_paths -if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then - dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar" - dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar" - dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar" - dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar" -else - dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" - dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" - dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" - dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar" -fi +dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar" +dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar" +dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar" +dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar" function download { # Historically there were various paths for the performance test package. # Test all of them. - declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz" - "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz" - ) + declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz") for path in "${urls_to_try[@]}" do diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 3d37a6c0e92..767807d008b 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -4,7 +4,7 @@ set -ex CHPC_CHECK_START_TIMESTAMP="$(date +%s)" export CHPC_CHECK_START_TIMESTAMP -S3_URL=${S3_URL:="https://clickhouse-builds.s3.yandex.net"} +S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"} COMMON_BUILD_PREFIX="/clickhouse_build_check" if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then @@ -64,9 +64,7 @@ function find_reference_sha # Historically there were various path for the performance test package, # test all of them. unset found - declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz" - "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz" - ) + declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz") for path in "${urls_to_try[@]}" do if curl_with_retry "$path" diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 61987d34299..2266641397b 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -19,58 +19,126 @@ import xml.etree.ElementTree as et from threading import Thread from scipy import stats -logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING') +logging.basicConfig( + format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING" +) total_start_seconds = time.perf_counter() stage_start_seconds = total_start_seconds + def reportStageEnd(stage): global stage_start_seconds, total_start_seconds current = time.perf_counter() - print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}') + print( + f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}" + ) stage_start_seconds = current def tsv_escape(s): - return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') + return ( + s.replace("\\", "\\\\") + .replace("\t", "\\t") + .replace("\n", "\\n") + .replace("\r", "") + ) -parser = argparse.ArgumentParser(description='Run performance test.') +parser = argparse.ArgumentParser(description="Run performance test.") # Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. -parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') -parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.") -parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.") -parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.') -parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.') -parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.') -parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.') -parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.') -parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.') -parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') -parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') -parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.') -parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.") -parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.") +parser.add_argument( + "file", + metavar="FILE", + type=argparse.FileType("r", encoding="utf-8"), + nargs=1, + help="test description file", +) +parser.add_argument( + "--host", + nargs="*", + default=["localhost"], + help="Space-separated list of server hostname(s). Corresponds to '--port' options.", +) +parser.add_argument( + "--port", + nargs="*", + default=[9000], + help="Space-separated list of server port(s). Corresponds to '--host' options.", +) +parser.add_argument( + "--runs", type=int, default=1, help="Number of query runs per server." +) +parser.add_argument( + "--max-queries", + type=int, + default=None, + help="Test no more than this number of queries, chosen at random.", +) +parser.add_argument( + "--queries-to-run", + nargs="*", + type=int, + default=None, + help="Space-separated list of indexes of queries to test.", +) +parser.add_argument( + "--max-query-seconds", + type=int, + default=15, + help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.", +) +parser.add_argument( + "--prewarm-max-query-seconds", + type=int, + default=180, + help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.", +) +parser.add_argument( + "--profile-seconds", + type=int, + default=0, + help="For how many seconds to profile a query for which the performance has changed.", +) +parser.add_argument( + "--long", action="store_true", help="Do not skip the tests tagged as long." +) +parser.add_argument( + "--print-queries", action="store_true", help="Print test queries and exit." +) +parser.add_argument( + "--print-settings", action="store_true", help="Print test settings and exit." +) +parser.add_argument( + "--keep-created-tables", + action="store_true", + help="Don't drop the created tables after the test.", +) +parser.add_argument( + "--use-existing-tables", + action="store_true", + help="Don't create or drop the tables, use the existing ones instead.", +) args = parser.parse_args() -reportStageEnd('start') +reportStageEnd("start") test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] tree = et.parse(args.file[0]) root = tree.getroot() -reportStageEnd('parse') +reportStageEnd("parse") # Process query parameters -subst_elems = root.findall('substitutions/substitution') -available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } +subst_elems = root.findall("substitutions/substitution") +available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } for e in subst_elems: - name = e.find('name').text - values = [v.text for v in e.findall('values/value')] + name = e.find("name").text + values = [v.text for v in e.findall("values/value")] if not values: - raise Exception(f'No values given for substitution {{{name}}}') + raise Exception(f"No values given for substitution {{{name}}}") available_parameters[name] = values @@ -78,7 +146,7 @@ for e in subst_elems: # parameters. The set of parameters is determined based on the first list. # Note: keep the order of queries -- sometimes we have DROP IF EXISTS # followed by CREATE in create queries section, so the order matters. -def substitute_parameters(query_templates, other_templates = []): +def substitute_parameters(query_templates, other_templates=[]): query_results = [] other_results = [[]] * (len(other_templates)) for i, q in enumerate(query_templates): @@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []): # and reporting the queries marked as short. test_queries = [] is_short = [] -for e in root.findall('query'): - new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]]) +for e in root.findall("query"): + new_queries, [new_is_short] = substitute_parameters( + [e.text], [[e.attrib.get("short", "0")]] + ) test_queries += new_queries is_short += [eval(s) for s in new_is_short] -assert(len(test_queries) == len(is_short)) +assert len(test_queries) == len(is_short) # If we're given a list of queries to run, check that it makes sense. for i in args.queries_to_run or []: if i < 0 or i >= len(test_queries): - print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present') + print( + f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present" + ) exit(1) # If we're only asked to print the queries, do that and exit. @@ -125,60 +197,65 @@ if args.print_queries: # Print short queries for i, s in enumerate(is_short): if s: - print(f'short\t{i}') + print(f"short\t{i}") # If we're only asked to print the settings, do that and exit. These are settings # for clickhouse-benchmark, so we print them as command line arguments, e.g. # '--max_memory_usage=10000000'. if args.print_settings: - for s in root.findall('settings/*'): - print(f'--{s.tag}={s.text}') + for s in root.findall("settings/*"): + print(f"--{s.tag}={s.text}") exit(0) # Skip long tests if not args.long: - for tag in root.findall('.//tag'): - if tag.text == 'long': - print('skipped\tTest is tagged as long.') + for tag in root.findall(".//tag"): + if tag.text == "long": + print("skipped\tTest is tagged as long.") sys.exit(0) # Print report threshold for the test if it is set. ignored_relative_change = 0.05 -if 'max_ignored_relative_change' in root.attrib: +if "max_ignored_relative_change" in root.attrib: ignored_relative_change = float(root.attrib["max_ignored_relative_change"]) - print(f'report-threshold\t{ignored_relative_change}') + print(f"report-threshold\t{ignored_relative_change}") -reportStageEnd('before-connect') +reportStageEnd("before-connect") # Open connections -servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)] +servers = [ + {"host": host or args.host[0], "port": port or args.port[0]} + for (host, port) in itertools.zip_longest(args.host, args.port) +] # Force settings_is_important to fail queries on unknown settings. -all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers] +all_connections = [ + clickhouse_driver.Client(**server, settings_is_important=True) for server in servers +] for i, s in enumerate(servers): print(f'server\t{i}\t{s["host"]}\t{s["port"]}') -reportStageEnd('connect') +reportStageEnd("connect") if not args.use_existing_tables: # Run drop queries, ignoring errors. Do this before all other activity, # because clickhouse_driver disconnects on error (this is not configurable), # and the new connection loses the changes in settings. - drop_query_templates = [q.text for q in root.findall('drop_query')] + drop_query_templates = [q.text for q in root.findall("drop_query")] drop_queries = substitute_parameters(drop_query_templates) for conn_index, c in enumerate(all_connections): for q in drop_queries: try: c.execute(q) - print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') + print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}") except: pass - reportStageEnd('drop-1') + reportStageEnd("drop-1") # Apply settings. -settings = root.findall('settings/*') +settings = root.findall("settings/*") for conn_index, c in enumerate(all_connections): for s in settings: # requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings @@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections): # the test, which is wrong. c.execute("select 1") -reportStageEnd('settings') +reportStageEnd("settings") # Check tables that should exist. If they don't exist, just skip this test. -tables = [e.text for e in root.findall('preconditions/table_exists')] +tables = [e.text for e in root.findall("preconditions/table_exists")] for t in tables: for c in all_connections: try: res = c.execute("select 1 from {} limit 1".format(t)) except: exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1] - skipped_message = ' '.join(exception_message.split('\n')[:2]) - print(f'skipped\t{tsv_escape(skipped_message)}') + skipped_message = " ".join(exception_message.split("\n")[:2]) + print(f"skipped\t{tsv_escape(skipped_message)}") sys.exit(0) -reportStageEnd('preconditions') +reportStageEnd("preconditions") if not args.use_existing_tables: # Run create and fill queries. We will run them simultaneously for both # servers, to save time. The weird XML search + filter is because we want to # keep the relative order of elements, and etree doesn't support the # appropriate xpath query. - create_query_templates = [q.text for q in root.findall('./*') - if q.tag in ('create_query', 'fill_query')] + create_query_templates = [ + q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query") + ] create_queries = substitute_parameters(create_query_templates) # Disallow temporary tables, because the clickhouse_driver reconnects on # errors, and temporary tables are destroyed. We want to be able to continue # after some errors. for q in create_queries: - if re.search('create temporary table', q, flags=re.IGNORECASE): - print(f"Temporary tables are not allowed in performance tests: '{q}'", - file = sys.stderr) + if re.search("create temporary table", q, flags=re.IGNORECASE): + print( + f"Temporary tables are not allowed in performance tests: '{q}'", + file=sys.stderr, + ) sys.exit(1) def do_create(connection, index, queries): for q in queries: connection.execute(q) - print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}') + print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}") threads = [ - Thread(target = do_create, args = (connection, index, create_queries)) - for index, connection in enumerate(all_connections)] + Thread(target=do_create, args=(connection, index, create_queries)) + for index, connection in enumerate(all_connections) + ] for t in threads: t.start() @@ -238,14 +319,16 @@ if not args.use_existing_tables: for t in threads: t.join() - reportStageEnd('create') + reportStageEnd("create") # By default, test all queries. queries_to_run = range(0, len(test_queries)) if args.max_queries: # If specified, test a limited number of queries chosen at random. - queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries)) + queries_to_run = random.sample( + range(0, len(test_queries)), min(len(test_queries), args.max_queries) + ) if args.queries_to_run: # Run the specified queries. @@ -255,16 +338,16 @@ if args.queries_to_run: profile_total_seconds = 0 for query_index in queries_to_run: q = test_queries[query_index] - query_prefix = f'{test_name}.query{query_index}' + query_prefix = f"{test_name}.query{query_index}" # We have some crazy long queries (about 100kB), so trim them to a sane # length. This means we can't use query text as an identifier and have to # use the test name + the test-wide query index. query_display_name = q if len(query_display_name) > 1000: - query_display_name = f'{query_display_name[:1000]}...({query_index})' + query_display_name = f"{query_display_name[:1000]}...({query_index})" - print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}') + print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}") # Prewarm: run once on both servers. Helps to bring the data into memory, # precompile the queries, etc. @@ -272,10 +355,10 @@ for query_index in queries_to_run: # new one. We want to run them on the new server only, so that the PR author # can ensure that the test works properly. Remember the errors we had on # each server. - query_error_on_connection = [None] * len(all_connections); + query_error_on_connection = [None] * len(all_connections) for conn_index, c in enumerate(all_connections): try: - prewarm_id = f'{query_prefix}.prewarm0' + prewarm_id = f"{query_prefix}.prewarm0" try: # During the warmup runs, we will also: @@ -283,25 +366,30 @@ for query_index in queries_to_run: # * collect profiler traces, which might be helpful for analyzing # test coverage. We disable profiler for normal runs because # it makes the results unstable. - res = c.execute(q, query_id = prewarm_id, - settings = { - 'max_execution_time': args.prewarm_max_query_seconds, - 'query_profiler_real_time_period_ns': 10000000, - 'memory_profiler_step': '4Mi', - }) + res = c.execute( + q, + query_id=prewarm_id, + settings={ + "max_execution_time": args.prewarm_max_query_seconds, + "query_profiler_real_time_period_ns": 10000000, + "memory_profiler_step": "4Mi", + }, + ) except clickhouse_driver.errors.Error as e: # Add query id to the exception to make debugging easier. e.args = (prewarm_id, *e.args) - e.message = prewarm_id + ': ' + e.message + e.message = prewarm_id + ": " + e.message raise - print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') + print( + f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}" + ) except KeyboardInterrupt: raise except: # FIXME the driver reconnects on error and we lose settings, so this # might lead to further errors or unexpected behavior. - query_error_on_connection[conn_index] = traceback.format_exc(); + query_error_on_connection[conn_index] = traceback.format_exc() continue # Report all errors that ocurred during prewarm and decide what to do next. @@ -311,14 +399,14 @@ for query_index in queries_to_run: no_errors = [] for i, e in enumerate(query_error_on_connection): if e: - print(e, file = sys.stderr) + print(e, file=sys.stderr) else: no_errors.append(i) if len(no_errors) == 0: continue elif len(no_errors) < len(all_connections): - print(f'partial\t{query_index}\t{no_errors}') + print(f"partial\t{query_index}\t{no_errors}") this_query_connections = [all_connections[index] for index in no_errors] @@ -337,27 +425,34 @@ for query_index in queries_to_run: all_server_times.append([]) while True: - run_id = f'{query_prefix}.run{run}' + run_id = f"{query_prefix}.run{run}" for conn_index, c in enumerate(this_query_connections): try: - res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds}) + res = c.execute( + q, + query_id=run_id, + settings={"max_execution_time": args.max_query_seconds}, + ) except clickhouse_driver.errors.Error as e: # Add query id to the exception to make debugging easier. e.args = (run_id, *e.args) - e.message = run_id + ': ' + e.message + e.message = run_id + ": " + e.message raise elapsed = c.last_query.elapsed all_server_times[conn_index].append(elapsed) server_seconds += elapsed - print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}') + print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}") if elapsed > args.max_query_seconds: # Do not stop processing pathologically slow queries, # since this may hide errors in other queries. - print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr) + print( + f"The query no. {query_index} is taking too long to run ({elapsed} s)", + file=sys.stderr, + ) # Be careful with the counter, after this line it's the next iteration # already. @@ -386,7 +481,7 @@ for query_index in queries_to_run: break client_seconds = time.perf_counter() - start_seconds - print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') + print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}") # Run additional profiling queries to collect profile data, but only if test times appeared to be different. # We have to do it after normal runs because otherwise it will affect test statistics too much @@ -397,13 +492,15 @@ for query_index in queries_to_run: # Don't fail if for some reason there are not enough measurements. continue - pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue + pvalue = stats.ttest_ind( + all_server_times[0], all_server_times[1], equal_var=False + ).pvalue median = [statistics.median(t) for t in all_server_times] # Keep this consistent with the value used in report. Should eventually move # to (median[1] - median[0]) / min(median), which is compatible with "times" # difference we use in report (max(median) / min(median)). relative_diff = (median[1] - median[0]) / median[0] - print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}') + print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}") if abs(relative_diff) < ignored_relative_change or pvalue > 0.05: continue @@ -412,25 +509,31 @@ for query_index in queries_to_run: profile_start_seconds = time.perf_counter() run = 0 while time.perf_counter() - profile_start_seconds < args.profile_seconds: - run_id = f'{query_prefix}.profile{run}' + run_id = f"{query_prefix}.profile{run}" for conn_index, c in enumerate(this_query_connections): try: - res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000}) - print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') + res = c.execute( + q, + query_id=run_id, + settings={"query_profiler_real_time_period_ns": 10000000}, + ) + print( + f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}" + ) except clickhouse_driver.errors.Error as e: # Add query id to the exception to make debugging easier. e.args = (run_id, *e.args) - e.message = run_id + ': ' + e.message + e.message = run_id + ": " + e.message raise run += 1 profile_total_seconds += time.perf_counter() - profile_start_seconds -print(f'profile-total\t{profile_total_seconds}') +print(f"profile-total\t{profile_total_seconds}") -reportStageEnd('run') +reportStageEnd("run") # Run drop queries if not args.keep_created_tables and not args.use_existing_tables: @@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables: for conn_index, c in enumerate(all_connections): for q in drop_queries: c.execute(q) - print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') + print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}") - reportStageEnd('drop-2') + reportStageEnd("drop-2") diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 4cff6b41949..0cb8481ee6e 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -12,9 +12,13 @@ import pprint import sys import traceback -parser = argparse.ArgumentParser(description='Create performance test report') -parser.add_argument('--report', default='main', choices=['main', 'all-queries'], - help='Which report to build') +parser = argparse.ArgumentParser(description="Create performance test report") +parser.add_argument( + "--report", + default="main", + choices=["main", "all-queries"], + help="Which report to build", +) args = parser.parse_args() tables = [] @@ -31,8 +35,8 @@ unstable_partial_queries = 0 # max seconds to run one query by itself, not counting preparation allowed_single_run_time = 2 -color_bad='#ffb0c0' -color_good='#b0d050' +color_bad = "#ffb0c0" +color_good = "#b0d050" header_template = """ @@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}} table_anchor = 0 row_anchor = 0 + def currentTableAnchor(): global table_anchor - return f'{table_anchor}' + return f"{table_anchor}" + def newTableAnchor(): global table_anchor table_anchor += 1 return currentTableAnchor() + def currentRowAnchor(): global row_anchor global table_anchor - return f'{table_anchor}.{row_anchor}' + return f"{table_anchor}.{row_anchor}" + def nextRowAnchor(): global row_anchor global table_anchor - return f'{table_anchor}.{row_anchor + 1}' + return f"{table_anchor}.{row_anchor + 1}" + def advanceRowAnchor(): global row_anchor @@ -178,43 +187,58 @@ def advanceRowAnchor(): def tr(x, anchor=None): - #return '{x}'.format(a=a, x=str(x)) + # return '{x}'.format(a=a, x=str(x)) anchor = anchor if anchor else advanceRowAnchor() - return f'{x}' + return f"{x}" -def td(value, cell_attributes = ''): - return '{value}'.format( - cell_attributes = cell_attributes, - value = value) -def th(value, cell_attributes = ''): - return '{value}'.format( - cell_attributes = cell_attributes, - value = value) +def td(value, cell_attributes=""): + return "{value}".format( + cell_attributes=cell_attributes, value=value + ) -def tableRow(cell_values, cell_attributes = [], anchor=None): + +def th(value, cell_attributes=""): + return "{value}".format( + cell_attributes=cell_attributes, value=value + ) + + +def tableRow(cell_values, cell_attributes=[], anchor=None): return tr( - ''.join([td(v, a) - for v, a in itertools.zip_longest( - cell_values, cell_attributes, - fillvalue = '') - if a is not None and v is not None]), - anchor) + "".join( + [ + td(v, a) + for v, a in itertools.zip_longest( + cell_values, cell_attributes, fillvalue="" + ) + if a is not None and v is not None + ] + ), + anchor, + ) -def tableHeader(cell_values, cell_attributes = []): + +def tableHeader(cell_values, cell_attributes=[]): return tr( - ''.join([th(v, a) - for v, a in itertools.zip_longest( - cell_values, cell_attributes, - fillvalue = '') - if a is not None and v is not None])) + "".join( + [ + th(v, a) + for v, a in itertools.zip_longest( + cell_values, cell_attributes, fillvalue="" + ) + if a is not None and v is not None + ] + ) + ) + def tableStart(title): - cls = '-'.join(title.lower().split(' ')[:3]); + cls = "-".join(title.lower().split(" ")[:3]) global table_anchor table_anchor = cls anchor = currentTableAnchor() - help_anchor = '-'.join(title.lower().split(' ')); + help_anchor = "-".join(title.lower().split(" ")) return f"""

{title} @@ -223,12 +247,14 @@ def tableStart(title): """ + def tableEnd(): - return '
' + return "" + def tsvRows(n): try: - with open(n, encoding='utf-8') as fd: + with open(n, encoding="utf-8") as fd: result = [] for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE): new_row = [] @@ -237,27 +263,32 @@ def tsvRows(n): # The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so # 'Чем зÐ�нимаеÑ�ЬÑ�Ñ�' is transformed back into 'Чем зАнимаешЬся'. - new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8')) + new_row.append( + e.encode("utf-8") + .decode("unicode-escape") + .encode("latin1") + .decode("utf-8") + ) result.append(new_row) return result except: - report_errors.append( - traceback.format_exception_only( - *sys.exc_info()[:2])[-1]) + report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass return [] + def htmlRows(n): rawRows = tsvRows(n) - result = '' + result = "" for row in rawRows: result += tableRow(row) return result + def addSimpleTable(caption, columns, rows, pos=None): global tables - text = '' + text = "" if not rows: return @@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None): text += tableEnd() tables.insert(pos if pos else len(tables), text) + def add_tested_commits(): global report_errors try: - addSimpleTable('Tested Commits', ['Old', 'New'], - [['
{}
'.format(x) for x in - [open('left-commit.txt').read(), - open('right-commit.txt').read()]]]) + addSimpleTable( + "Tested Commits", + ["Old", "New"], + [ + [ + "
{}
".format(x) + for x in [ + open("left-commit.txt").read(), + open("right-commit.txt").read(), + ] + ] + ], + ) except: # Don't fail if no commit info -- maybe it's a manual run. - report_errors.append( - traceback.format_exception_only( - *sys.exc_info()[:2])[-1]) + report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass + def add_report_errors(): global tables global report_errors # Add the errors reported by various steps of comparison script try: - report_errors += [l.strip() for l in open('report/errors.log')] + report_errors += [l.strip() for l in open("report/errors.log")] except: - report_errors.append( - traceback.format_exception_only( - *sys.exc_info()[:2])[-1]) + report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass if not report_errors: return - text = tableStart('Errors while Building the Report') - text += tableHeader(['Error']) + text = tableStart("Errors while Building the Report") + text += tableHeader(["Error"]) for x in report_errors: text += tableRow([x]) text += tableEnd() # Insert after Tested Commits tables.insert(1, text) - errors_explained.append([f'There were some errors while building the report']); + errors_explained.append( + [ + f'There were some errors while building the report' + ] + ) + def add_errors_explained(): if not errors_explained: return text = '' - text += tableStart('Error Summary') - text += tableHeader(['Description']) + text += tableStart("Error Summary") + text += tableHeader(["Description"]) for row in errors_explained: text += tableRow(row) text += tableEnd() @@ -321,59 +364,81 @@ def add_errors_explained(): tables.insert(1, text) -if args.report == 'main': +if args.report == "main": print((header_template.format())) add_tested_commits() - - run_error_rows = tsvRows('run-errors.tsv') + run_error_rows = tsvRows("run-errors.tsv") error_tests += len(run_error_rows) - addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows) + addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows) if run_error_rows: - errors_explained.append([f'There were some errors while running the tests']); + errors_explained.append( + [ + f'There were some errors while running the tests' + ] + ) - - slow_on_client_rows = tsvRows('report/slow-on-client.tsv') + slow_on_client_rows = tsvRows("report/slow-on-client.tsv") error_tests += len(slow_on_client_rows) - addSimpleTable('Slow on Client', - ['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'], - slow_on_client_rows) + addSimpleTable( + "Slow on Client", + ["Client time, s", "Server time, s", "Ratio", "Test", "Query"], + slow_on_client_rows, + ) if slow_on_client_rows: - errors_explained.append([f'Some queries are taking noticeable time client-side (missing `FORMAT Null`?)']); + errors_explained.append( + [ + f'Some queries are taking noticeable time client-side (missing `FORMAT Null`?)' + ] + ) - unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv') + unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv") error_tests += len(unmarked_short_rows) - addSimpleTable('Unexpected Query Duration', - ['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'], - unmarked_short_rows) + addSimpleTable( + "Unexpected Query Duration", + ["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"], + unmarked_short_rows, + ) if unmarked_short_rows: - errors_explained.append([f'Some queries have unexpected duration']); + errors_explained.append( + [ + f'Some queries have unexpected duration' + ] + ) def add_partial(): - rows = tsvRows('report/partial-queries-report.tsv') + rows = tsvRows("report/partial-queries-report.tsv") if not rows: return global unstable_partial_queries, slow_average_tests, tables - text = tableStart('Partial Queries') - columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query'] + text = tableStart("Partial Queries") + columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"] text += tableHeader(columns) - attrs = ['' for c in columns] + attrs = ["" for c in columns] for row in rows: - anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}' + anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}" if float(row[1]) > 0.10: attrs[1] = f'style="background: {color_bad}"' unstable_partial_queries += 1 - errors_explained.append([f'The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%']) + errors_explained.append( + [ + f"The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%" + ] + ) else: - attrs[1] = '' + attrs[1] = "" if float(row[0]) > allowed_single_run_time: attrs[0] = f'style="background: {color_bad}"' - errors_explained.append([f'The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"']) + errors_explained.append( + [ + f'The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"' + ] + ) slow_average_tests += 1 else: - attrs[0] = '' + attrs[0] = "" text += tableRow(row, attrs, anchor) text += tableEnd() tables.append(text) @@ -381,41 +446,45 @@ if args.report == 'main': add_partial() def add_changes(): - rows = tsvRows('report/changed-perf.tsv') + rows = tsvRows("report/changed-perf.tsv") if not rows: return global faster_queries, slower_queries, tables - text = tableStart('Changes in Performance') + text = tableStart("Changes in Performance") columns = [ - 'Old, s', # 0 - 'New, s', # 1 - 'Ratio of speedup (-) or slowdown (+)', # 2 - 'Relative difference (new − old) / old', # 3 - 'p < 0.01 threshold', # 4 - '', # Failed # 5 - 'Test', # 6 - '#', # 7 - 'Query', # 8 - ] - attrs = ['' for c in columns] + "Old, s", # 0 + "New, s", # 1 + "Ratio of speedup (-) or slowdown (+)", # 2 + "Relative difference (new − old) / old", # 3 + "p < 0.01 threshold", # 4 + "", # Failed # 5 + "Test", # 6 + "#", # 7 + "Query", # 8 + ] + attrs = ["" for c in columns] attrs[5] = None text += tableHeader(columns, attrs) for row in rows: - anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}' + anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}" if int(row[5]): - if float(row[3]) < 0.: + if float(row[3]) < 0.0: faster_queries += 1 attrs[2] = attrs[3] = f'style="background: {color_good}"' else: slower_queries += 1 attrs[2] = attrs[3] = f'style="background: {color_bad}"' - errors_explained.append([f'The query no. {row[7]} of test \'{row[6]}\' has slowed down']) + errors_explained.append( + [ + f"The query no. {row[7]} of test '{row[6]}' has slowed down" + ] + ) else: - attrs[2] = attrs[3] = '' + attrs[2] = attrs[3] = "" text += tableRow(row, attrs, anchor) @@ -427,35 +496,35 @@ if args.report == 'main': def add_unstable_queries(): global unstable_queries, very_unstable_queries, tables - unstable_rows = tsvRows('report/unstable-queries.tsv') + unstable_rows = tsvRows("report/unstable-queries.tsv") if not unstable_rows: return unstable_queries += len(unstable_rows) columns = [ - 'Old, s', #0 - 'New, s', #1 - 'Relative difference (new - old)/old', #2 - 'p < 0.01 threshold', #3 - '', # Failed #4 - 'Test', #5 - '#', #6 - 'Query' #7 + "Old, s", # 0 + "New, s", # 1 + "Relative difference (new - old)/old", # 2 + "p < 0.01 threshold", # 3 + "", # Failed #4 + "Test", # 5 + "#", # 6 + "Query", # 7 ] - attrs = ['' for c in columns] + attrs = ["" for c in columns] attrs[4] = None - text = tableStart('Unstable Queries') + text = tableStart("Unstable Queries") text += tableHeader(columns, attrs) for r in unstable_rows: - anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}' + anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}" if int(r[4]): very_unstable_queries += 1 attrs[3] = f'style="background: {color_bad}"' else: - attrs[3] = '' + attrs[3] = "" # Just don't add the slightly unstable queries we don't consider # errors. It's not clear what the user should do with them. continue @@ -470,53 +539,70 @@ if args.report == 'main': add_unstable_queries() - skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') - addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows) + skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv") + addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows) - addSimpleTable('Test Performance Changes', - ['Test', 'Ratio of speedup (-) or slowdown (+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'], - tsvRows('report/test-perf-changes.tsv')) + addSimpleTable( + "Test Performance Changes", + [ + "Test", + "Ratio of speedup (-) or slowdown (+)", + "Queries", + "Total not OK", + "Changed perf", + "Unstable", + ], + tsvRows("report/test-perf-changes.tsv"), + ) def add_test_times(): global slow_average_tests, tables - rows = tsvRows('report/test-times.tsv') + rows = tsvRows("report/test-times.tsv") if not rows: return columns = [ - 'Test', #0 - 'Wall clock time, entire test, s', #1 - 'Total client time for measured query runs, s', #2 - 'Queries', #3 - 'Longest query, total for measured runs, s', #4 - 'Wall clock time per query, s', #5 - 'Shortest query, total for measured runs, s', #6 - '', # Runs #7 - ] - attrs = ['' for c in columns] + "Test", # 0 + "Wall clock time, entire test, s", # 1 + "Total client time for measured query runs, s", # 2 + "Queries", # 3 + "Longest query, total for measured runs, s", # 4 + "Wall clock time per query, s", # 5 + "Shortest query, total for measured runs, s", # 6 + "", # Runs #7 + ] + attrs = ["" for c in columns] attrs[7] = None - text = tableStart('Test Times') + text = tableStart("Test Times") text += tableHeader(columns, attrs) - allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs + allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs for r in rows: - anchor = f'{currentTableAnchor()}.{r[0]}' + anchor = f"{currentTableAnchor()}.{r[0]}" total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers - if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs: + if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs: # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 attrs[5] = f'style="background: {color_bad}"' - errors_explained.append([f'The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up']) + errors_explained.append( + [ + f"The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up" + ] + ) else: - attrs[5] = '' + attrs[5] = "" - if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs: + if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs: slow_average_tests += 1 attrs[4] = f'style="background: {color_bad}"' - errors_explained.append([f'Some query of the test \'{r[0]}\' is too slow to run. See the all queries report']) + errors_explained.append( + [ + f"Some query of the test '{r[0]}' is too slow to run. See the all queries report" + ] + ) else: - attrs[4] = '' + attrs[4] = "" text += tableRow(r, attrs, anchor) @@ -525,10 +611,17 @@ if args.report == 'main': add_test_times() - addSimpleTable('Metric Changes', - ['Metric', 'Old median value', 'New median value', - 'Relative difference', 'Times difference'], - tsvRows('metrics/changes.tsv')) + addSimpleTable( + "Metric Changes", + [ + "Metric", + "Old median value", + "New median value", + "Relative difference", + "Times difference", + ], + tsvRows("metrics/changes.tsv"), + ) add_report_errors() add_errors_explained() @@ -536,7 +629,8 @@ if args.report == 'main': for t in tables: print(t) - print(f""" + print( + f""" - """) + """ + ) - status = 'success' - message = 'See the report' + status = "success" + message = "See the report" message_array = [] if slow_average_tests: - status = 'failure' - message_array.append(str(slow_average_tests) + ' too long') + status = "failure" + message_array.append(str(slow_average_tests) + " too long") if faster_queries: - message_array.append(str(faster_queries) + ' faster') + message_array.append(str(faster_queries) + " faster") if slower_queries: if slower_queries > 3: - status = 'failure' - message_array.append(str(slower_queries) + ' slower') + status = "failure" + message_array.append(str(slower_queries) + " slower") if unstable_partial_queries: very_unstable_queries += unstable_partial_queries - status = 'failure' + status = "failure" # Don't show mildly unstable queries, only the very unstable ones we # treat as errors. if very_unstable_queries: if very_unstable_queries > 5: error_tests += very_unstable_queries - status = 'failure' - message_array.append(str(very_unstable_queries) + ' unstable') + status = "failure" + message_array.append(str(very_unstable_queries) + " unstable") error_tests += slow_average_tests if error_tests: - status = 'failure' - message_array.insert(0, str(error_tests) + ' errors') + status = "failure" + message_array.insert(0, str(error_tests) + " errors") if message_array: - message = ', '.join(message_array) + message = ", ".join(message_array) if report_errors: - status = 'failure' - message = 'Errors while building the report.' + status = "failure" + message = "Errors while building the report." - print((""" + print( + ( + """ - """.format(status=status, message=message))) + """.format( + status=status, message=message + ) + ) + ) -elif args.report == 'all-queries': +elif args.report == "all-queries": print((header_template.format())) add_tested_commits() def add_all_queries(): - rows = tsvRows('report/all-queries.tsv') + rows = tsvRows("report/all-queries.tsv") if not rows: return columns = [ - '', # Changed #0 - '', # Unstable #1 - 'Old, s', #2 - 'New, s', #3 - 'Ratio of speedup (-) or slowdown (+)', #4 - 'Relative difference (new − old) / old', #5 - 'p < 0.01 threshold', #6 - 'Test', #7 - '#', #8 - 'Query', #9 - ] - attrs = ['' for c in columns] + "", # Changed #0 + "", # Unstable #1 + "Old, s", # 2 + "New, s", # 3 + "Ratio of speedup (-) or slowdown (+)", # 4 + "Relative difference (new − old) / old", # 5 + "p < 0.01 threshold", # 6 + "Test", # 7 + "#", # 8 + "Query", # 9 + ] + attrs = ["" for c in columns] attrs[0] = None attrs[1] = None - text = tableStart('All Query Times') + text = tableStart("All Query Times") text += tableHeader(columns, attrs) for r in rows: - anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}' + anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}" if int(r[1]): attrs[6] = f'style="background: {color_bad}"' else: - attrs[6] = '' + attrs[6] = "" if int(r[0]): - if float(r[5]) > 0.: + if float(r[5]) > 0.0: attrs[4] = attrs[5] = f'style="background: {color_bad}"' else: attrs[4] = attrs[5] = f'style="background: {color_good}"' else: - attrs[4] = attrs[5] = '' + attrs[4] = attrs[5] = "" if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time: attrs[2] = f'style="background: {color_bad}"' attrs[3] = f'style="background: {color_bad}"' else: - attrs[2] = '' - attrs[3] = '' + attrs[2] = "" + attrs[3] = "" text += tableRow(r, attrs, anchor) @@ -655,7 +756,8 @@ elif args.report == 'all-queries': for t in tables: print(t) - print(f""" + print( + f""" - """) + """ + ) diff --git a/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py b/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py index 58d6ba8c62a..b5bc82e6818 100755 --- a/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py +++ b/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py @@ -7,18 +7,19 @@ import csv RESULT_LOG_NAME = "run.log" + def process_result(result_folder): status = "success" - description = 'Server started and responded' + description = "Server started and responded" summary = [("Smoke test", "OK")] - with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log: - lines = run_log.read().split('\n') - if not lines or lines[0].strip() != 'OK': + with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log: + lines = run_log.read().split("\n") + if not lines or lines[0].strip() != "OK": status = "failure" - logging.info("Lines is not ok: %s", str('\n'.join(lines))) + logging.info("Lines is not ok: %s", str("\n".join(lines))) summary = [("Smoke test", "FAIL")] - description = 'Server failed to respond, see result in logs' + description = "Server failed to respond, see result in logs" result_logs = [] server_log_path = os.path.join(result_folder, "clickhouse-server.log") @@ -38,20 +39,22 @@ def process_result(result_folder): def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of split build smoke test" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results, logs = process_result(args.in_results_dir) diff --git a/docker/test/sqlancer/process_sqlancer_result.py b/docker/test/sqlancer/process_sqlancer_result.py index ede3cabc1c5..37b8f465498 100755 --- a/docker/test/sqlancer/process_sqlancer_result.py +++ b/docker/test/sqlancer/process_sqlancer_result.py @@ -10,11 +10,18 @@ def process_result(result_folder): status = "success" summary = [] paths = [] - tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"] + tests = [ + "TLPWhere", + "TLPGroupBy", + "TLPHaving", + "TLPWhereGroupBy", + "TLPDistinct", + "TLPAggregate", + ] for test in tests: - err_path = '{}/{}.err'.format(result_folder, test) - out_path = '{}/{}.out'.format(result_folder, test) + err_path = "{}/{}.err".format(result_folder, test) + out_path = "{}/{}.out".format(result_folder, test) if not os.path.exists(err_path): logging.info("No output err on path %s", err_path) summary.append((test, "SKIPPED")) @@ -23,24 +30,24 @@ def process_result(result_folder): else: paths.append(err_path) paths.append(out_path) - with open(err_path, 'r') as f: - if 'AssertionError' in f.read(): + with open(err_path, "r") as f: + if "AssertionError" in f.read(): summary.append((test, "FAIL")) - status = 'failure' + status = "failure" else: summary.append((test, "OK")) - logs_path = '{}/logs.tar.gz'.format(result_folder) + logs_path = "{}/logs.tar.gz".format(result_folder) if not os.path.exists(logs_path): logging.info("No logs tar on path %s", logs_path) else: paths.append(logs_path) - stdout_path = '{}/stdout.log'.format(result_folder) + stdout_path = "{}/stdout.log".format(result_folder) if not os.path.exists(stdout_path): logging.info("No stdout log on path %s", stdout_path) else: paths.append(stdout_path) - stderr_path = '{}/stderr.log'.format(result_folder) + stderr_path = "{}/stderr.log".format(result_folder) if not os.path.exists(stderr_path): logging.info("No stderr log on path %s", stderr_path) else: @@ -52,20 +59,22 @@ def process_result(result_folder): def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of sqlancer test" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results, logs = process_result(args.in_results_dir) diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 93e7cebb857..543cf113b2b 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -11,7 +11,7 @@ RUN apt-get update -y \ COPY s3downloader /s3downloader -ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" +ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com" ENV DATASETS="hits visits" ENV EXPORT_S3_STORAGE_POLICIES=1 diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 77dc61e6cd0..6aa9d88f5b4 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -96,7 +96,7 @@ else clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits" + clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" fi clickhouse-client --query "SHOW TABLES FROM test" @@ -115,7 +115,7 @@ function run_tests() fi set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \ + clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \ --skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \ "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index eb3b3cd9faf..b1302877d6a 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -10,7 +10,7 @@ import requests import tempfile -DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net' +DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com' AVAILABLE_DATASETS = { 'hits': 'hits_v1.tar', diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 5fd78502337..63750b90b5a 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -131,8 +131,23 @@ clickhouse-client -q "system flush logs" ||: grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz & -clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz & -clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz & + +# Compress tables. +# +# NOTE: +# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data +# directly +# - even though ci auto-compress some files (but not *.tsv) it does this only +# for files >64MB, we want this files to be compressed explicitly +for table in query_log zookeeper_log trace_log transactions_info_log +do + clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz & + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz & + clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz & + fi +done +wait ||: # Also export trace log in flamegraph-friendly format. for trace_type in CPU Memory Real @@ -161,14 +176,6 @@ fi tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: -# Replace the engine with Ordinary to avoid extra symlinks stuff in artifacts. -# (so that clickhouse-local --path can read it w/o extra care). -sed -i -e "s/ATTACH DATABASE _ UUID '[^']*'/ATTACH DATABASE system/" -e "s/Atomic/Ordinary/" /var/lib/clickhouse/metadata/system.sql -for table in text_log query_log zookeeper_log trace_log; do - sed -i "s/ATTACH TABLE _ UUID '[^']*'/ATTACH TABLE $table/" /var/lib/clickhouse/metadata/system/${table}.sql - tar -chf /test_output/${table}_dump.tar /var/lib/clickhouse/metadata/system.sql /var/lib/clickhouse/metadata/system/${table}.sql /var/lib/clickhouse/data/system/${table} ||: -done - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||: grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||: @@ -179,8 +186,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] rm /var/log/clickhouse-server/clickhouse-server2.log mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: - tar -chf /test_output/zookeeper_log_dump1.tar /var/lib/clickhouse1/data/system/zookeeper_log ||: - tar -chf /test_output/zookeeper_log_dump2.tar /var/lib/clickhouse2/data/system/zookeeper_log ||: tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: fi diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh index df27b21b05b..aa2aedefad8 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/docker/test/stateless/setup_minio.sh @@ -41,6 +41,7 @@ sleep 5 ./mc admin user add clickminio test testtest ./mc admin policy set clickminio readwrite user=test ./mc mb clickminio/test +./mc policy set public clickminio/test # Upload data to Minio. By default after unpacking all tests will in diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 1f39202e743..ba6daffc014 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -25,10 +25,11 @@ RUN apt-get update -y \ brotli COPY ./stress /stress +COPY ./download_previous_release /download_previous_release COPY run.sh / ENV DATASETS="hits visits" -ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" +ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com" ENV EXPORT_S3_STORAGE_POLICIES=1 CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stress/download_previous_release b/docker/test/stress/download_previous_release new file mode 100755 index 00000000000..ea3d376ad90 --- /dev/null +++ b/docker/test/stress/download_previous_release @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import requests +import re +import os + +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" + +CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" +CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static-dbg_{version}_amd64.deb" +CLICKHOUSE_SERVER_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-server_{version}_all.deb" +CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-client_{version}_amd64.deb" + + +CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb" +CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = "clickhouse-common-static-dbg_{version}_amd64.deb" +CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb" +CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb" + +PACKETS_DIR = "previous_release_package_folder/" +VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" + + +class Version: + def __init__(self, version): + self.version = version + + def __lt__(self, other): + return list(map(int, self.version.split('.'))) < list(map(int, other.version.split('.'))) + + def __str__(self): + return self.version + + +class ReleaseInfo: + def __init__(self, version, release_type): + self.version = version + self.type = release_type + + +def find_previous_release(server_version, releases): + releases.sort(key=lambda x: x.version, reverse=True) + for release in releases: + if release.version < server_version: + return True, release + + return False, None + + +def get_previous_release(server_version): + page = 1 + found = False + while not found: + response = requests.get(CLICKHOUSE_TAGS_URL, {'page': page, 'per_page': 100}) + if not response.ok: + raise Exception('Cannot load the list of tags from github: ' + response.reason) + + releases_str = set(re.findall(VERSION_PATTERN, response.text)) + if len(releases_str) == 0: + raise Exception('Cannot find previous release for ' + str(server_version) + ' server version') + + releases = list(map(lambda x: ReleaseInfo(Version(x.split('-')[0]), x.split('-')[1]), releases_str)) + found, previous_release = find_previous_release(server_version, releases) + page += 1 + + return previous_release + + +def download_packet(url, local_file_name, retries=10, backoff_factor=0.3): + session = requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + response = session.get(url) + print(url) + if response.ok: + open(PACKETS_DIR + local_file_name, 'wb').write(response.content) + + +def download_packets(release): + if not os.path.exists(PACKETS_DIR): + os.makedirs(PACKETS_DIR) + + download_packet(CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(version=release.version, type=release.type), + CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version)) + + download_packet(CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(version=release.version, type=release.type), + CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version)) + + download_packet(CLICKHOUSE_SERVER_DOWNLOAD_URL.format(version=release.version, type=release.type), + CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)) + + download_packet(CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(version=release.version, type=release.type), + CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)) + + +if __name__ == '__main__': + server_version = Version(input()) + previous_release = get_previous_release(server_version) + download_packets(previous_release) + diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 65c5fb9e40f..0f5139f5b4d 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -22,15 +22,19 @@ export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 -dpkg -i package_folder/clickhouse-common-static_*.deb -dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -dpkg -i package_folder/clickhouse-server_*.deb -dpkg -i package_folder/clickhouse-client_*.deb +function install_packages() +{ + dpkg -i $1/clickhouse-common-static_*.deb + dpkg -i $1/clickhouse-common-static-dbg_*.deb + dpkg -i $1/clickhouse-server_*.deb + dpkg -i $1/clickhouse-client_*.deb +} function configure() { @@ -102,21 +106,10 @@ function stop() function start() { - # Rename existing log file - it will be more convenient to read separate files for separate server runs. - if [ -f '/var/log/clickhouse-server/clickhouse-server.log' ] - then - log_file_counter=1 - while [ -f "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}" ] - do - log_file_counter=$((log_file_counter + 1)) - done - mv '/var/log/clickhouse-server/clickhouse-server.log' "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}" - fi - counter=0 until clickhouse-client --query "SELECT 1" do - if [ "$counter" -gt 240 ] + if [ "$counter" -gt ${1:-240} ] then echo "Cannot start clickhouse-server" cat /var/log/clickhouse-server/stdout.log @@ -171,6 +164,8 @@ quit time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: } +install_packages package_folder + configure ./setup_minio.sh @@ -184,6 +179,8 @@ clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordin clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" stop +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log + start clickhouse-client --query "SHOW TABLES FROM datasets" @@ -199,17 +196,17 @@ clickhouse-client --query "SHOW TABLES FROM test" || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv stop +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log + start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv + || (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" -# Print Fatal log messages to stdout -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* - # Grep logs for sanitizer asserts, crashes and other critical errors # Sanitizer asserts @@ -226,26 +223,158 @@ zgrep -Fa " Application: Child process was terminated by signal 9" /var/ || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors -zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \ + && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv +# Remove file logical_errors.txt if it's empty +[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt + # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \ + && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv +# Remove file fatal_messages.txt if it's empty +[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt + zgrep -Fa "########################################" /test_output/* > /dev/null \ && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \ && echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv +echo -e "Backward compatibility check\n" + +echo "Download previous release server" +mkdir previous_release_package_folder +clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv + +stop +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log + +if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] +then + echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv + + # Uninstall current packages + dpkg --remove clickhouse-client + dpkg --remove clickhouse-server + dpkg --remove clickhouse-common-static-dbg + dpkg --remove clickhouse-common-static + + rm -rf /var/lib/clickhouse/* + + # Install previous release packages + install_packages previous_release_package_folder + + # Start server from previous release + configure + start + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Install new package before running stress test because we should use new clickhouse-client and new clickhouse-test + install_packages package_folder + + mkdir tmp_stress_output + + ./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ + && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv + rm -rf tmp_stress_output + + clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" + + stop + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log + + # Start new server + configure + start 500 + clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ + || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Let the server run for a while before checking log. + sleep 60 + + stop + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.clean.log + + # Error messages (we should ignore some errors) + echo "Check for Error messages in server log:" + zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ + -e "Code: 236. DB::Exception: Cancelled mutating parts" \ + -e "REPLICA_IS_ALREADY_ACTIVE" \ + -e "REPLICA_IS_ALREADY_EXIST" \ + -e "ALL_REPLICAS_LOST" \ + -e "DDLWorker: Cannot parse DDL task query" \ + -e "RaftInstance: failed to accept a rpc connection due to error 125" \ + -e "UNKNOWN_DATABASE" \ + -e "NETWORK_ERROR" \ + -e "UNKNOWN_TABLE" \ + -e "ZooKeeperClient" \ + -e "KEEPER_EXCEPTION" \ + -e "DirectoryMonitor" \ + -e "TABLE_IS_READ_ONLY" \ + -e "Code: 1000, e.code() = 111, Connection refused" \ + -e "UNFINISHED" \ + -e "Renaming unexpected part" \ + /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ + && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_error_messages.txt if it's empty + [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt + + # Sanitizer asserts + zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \ + && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv + rm -f /test_output/tmp + + # OOM + zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ + && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Logical errors + echo "Check for Logical errors in server log:" + zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ + && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_logical_errors.txt if it's empty + [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt + + # Crash + zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ + && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv + + # It also checks for crash without stacktrace (printed by watchdog) + echo "Check for Fatal message in server log:" + zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \ + && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_fatal_messages.txt if it's empty + [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt +else + echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv +fi + # Put logs into /test_output/ for log_file in /var/log/clickhouse-server/clickhouse-server.log* do diff --git a/docker/test/stress/stress b/docker/test/stress/stress index c89c5ff5e27..d78de84f60d 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -10,7 +10,7 @@ import logging import time -def get_options(i): +def get_options(i, backward_compatibility_check): options = [] client_options = [] if 0 < i: @@ -19,7 +19,7 @@ def get_options(i): if i % 3 == 1: options.append("--db-engine=Ordinary") - if i % 3 == 2: + if i % 3 == 2 and not backward_compatibility_check: options.append('''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)) client_options.append('allow_experimental_database_replicated=1') @@ -47,7 +47,8 @@ def get_options(i): return ' '.join(options) -def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit): +def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit, backward_compatibility_check): + backward_compatibility_check_option = '--backward-compatibility-check' if backward_compatibility_check else '' global_time_limit_option = '' if global_time_limit: global_time_limit_option = "--global_time_limit={}".format(global_time_limit) @@ -56,7 +57,7 @@ def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_t pipes = [] for i in range(0, len(output_paths)): f = open(output_paths[i], 'w') - full_command = "{} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option) + full_command = "{} {} {} {} {}".format(cmd, get_options(i, backward_compatibility_check), global_time_limit_option, skip_tests_option, backward_compatibility_check_option) logging.info("Run func tests '%s'", full_command) p = Popen(full_command, shell=True, stdout=f, stderr=f) pipes.append(p) @@ -82,15 +83,15 @@ def make_query_command(query): def prepare_for_hung_check(drop_databases): # FIXME this function should not exist, but... - # ThreadFuzzer significantly slows down server and causes false-positive hung check failures - call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'") - # We attach gdb to clickhouse-server before running tests # to print stacktraces of all crashes even if clickhouse cannot print it for some reason. # However, it obstruct checking for hung queries. logging.info("Will terminate gdb (if any)") call_with_retry("kill -TERM $(pidof gdb)") + # ThreadFuzzer significantly slows down server and causes false-positive hung check failures + call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'") + call_with_retry(make_query_command('SELECT 1 FORMAT Null')) # Some tests execute SYSTEM STOP MERGES or similar queries. @@ -130,7 +131,7 @@ def prepare_for_hung_check(drop_databases): Popen(command, shell=True) break except Exception as ex: - print("Failed to SHOW or DROP databasese, will retry", ex) + logging.error("Failed to SHOW or DROP databasese, will retry %s", str(ex)) time.sleep(i) else: raise Exception("Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries") @@ -168,6 +169,7 @@ if __name__ == "__main__": parser.add_argument("--output-folder") parser.add_argument("--global-time-limit", type=int, default=1800) parser.add_argument("--num-parallel", type=int, default=cpu_count()) + parser.add_argument('--backward-compatibility-check', action='store_true') parser.add_argument('--hung-check', action='store_true', default=False) # make sense only for hung check parser.add_argument('--drop-databases', action='store_true', default=False) @@ -176,7 +178,7 @@ if __name__ == "__main__": if args.drop_databases and not args.hung_check: raise Exception("--drop-databases only used in hung check (--hung-check)") func_pipes = [] - func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit) + func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit, args.backward_compatibility_check) logging.info("Will wait functests to finish") while True: @@ -196,7 +198,11 @@ if __name__ == "__main__": logging.info("Logs compressed") if args.hung_check: - have_long_running_queries = prepare_for_hung_check(args.drop_databases) + try: + have_long_running_queries = prepare_for_hung_check(args.drop_databases) + except Exception as ex: + have_long_running_queries = True + logging.error("Failed to prepare for hung check %s", str(ex)) logging.info("Checking if some queries hung") cmd = ' '.join([args.test_cmd, # Do not track memory allocations up to 1Gi, @@ -213,6 +219,8 @@ if __name__ == "__main__": "--client-option", "max_untracked_memory=1Gi", "--client-option", "max_memory_usage_for_user=0", "--client-option", "memory_profiler_step=1Gi", + # Use system database to avoid CREATE/DROP DATABASE queries + "--database=system", "--hung-check", "00001_select_1" ]) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 85c751edfbe..3101ab84c40 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ - && pip3 install codespell PyGithub boto3 unidiff dohq-artifactory + && pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 655b7d70243..6472ff21f5e 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -14,6 +14,7 @@ def process_result(result_folder): ("header duplicates", "duplicate_output.txt"), ("shellcheck", "shellcheck_output.txt"), ("style", "style_output.txt"), + ("black", "black_output.txt"), ("typos", "typos_output.txt"), ("whitespaces", "whitespaces_output.txt"), ("workflows", "workflows_output.txt"), diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index ce3ea4e50a6..651883511e8 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -7,11 +7,13 @@ echo "Check duplicates" | ts ./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt echo "Check style" | ts ./check-style -n |& tee /test_output/style_output.txt +echo "Check python formatting with black" | ts +./check-black -n |& tee /test_output/black_output.txt echo "Check typos" | ts ./check-typos |& tee /test_output/typos_output.txt echo "Check whitespaces" | ts ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt -echo "Check sorkflows" | ts +echo "Check workflows" | ts ./check-workflows |& tee /test_output/workflows_output.txt echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt diff --git a/docker/test/test_runner.sh b/docker/test/test_runner.sh deleted file mode 100755 index 0c99c8c2b32..00000000000 --- a/docker/test/test_runner.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/sh - -set -e -x - -# Not sure why shellcheck complains that rc is not assigned before it is referenced. -# shellcheck disable=SC2154 -trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT - -# CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time -readonly NO_REBUILD_FLAG="--no-rebuild" - -readonly CLICKHOUSE_DOCKER_DIR="$(realpath "${1}")" -readonly CLICKHOUSE_PACKAGES_ARG="${2}" -CLICKHOUSE_SERVER_IMAGE="${3}" - -if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - readonly CLICKHOUSE_PACKAGES_DIR="$(realpath "${2}")" # or --no-rebuild -fi - - -# In order to allow packages directory to be anywhere, and to reduce amount of context sent to the docker daemon, -# all images are built in multiple stages: -# 1. build base image, install dependencies -# 2. run image with volume mounted, install what needed from those volumes -# 3. tag container as image -# 4. [optional] build another image atop of tagged. - -# TODO: optionally mount most recent clickhouse-test and queries directory from local machine - -if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ - --target clickhouse-test-runner-base \ - -t clickhouse-test-runner-base:preinstall \ - "${CLICKHOUSE_DOCKER_DIR}/test/stateless" - - docker rm -f clickhouse-test-runner-installing-packages || true - docker run --network=host \ - -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ - --name clickhouse-test-runner-installing-packages \ - clickhouse-test-runner-base:preinstall - docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local - docker rm -f clickhouse-test-runner-installing-packages || true -fi - -# # Create a bind-volume to the clickhouse-test script file -# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/clickhouse-test --opt o=bind clickhouse-test-script-volume -# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/queries --opt o=bind clickhouse-test-queries-dir-volume - -# Build server image (optional) from local packages -if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then - CLICKHOUSE_SERVER_IMAGE="clickhouse/server:local" - - if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - --target clickhouse-server-base \ - -t clickhouse-server-base:preinstall \ - "${CLICKHOUSE_DOCKER_DIR}/server" - - docker rm -f clickhouse_server_base_installing_server || true - docker run --network=host -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ - --name clickhouse_server_base_installing_server \ - clickhouse-server-base:preinstall - docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall - - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - --target clickhouse-server \ - -t "${CLICKHOUSE_SERVER_IMAGE}" \ - "${CLICKHOUSE_DOCKER_DIR}/server" - fi -fi - -docker rm -f test-runner || true -docker-compose down -CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ - docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ - create \ - --build --force-recreate - -CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ - docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ - run \ - --name test-runner \ - test-runner diff --git a/docker/test/test_runner_docker_compose.yaml b/docker/test/test_runner_docker_compose.yaml deleted file mode 100644 index 2aef6a48d77..00000000000 --- a/docker/test/test_runner_docker_compose.yaml +++ /dev/null @@ -1,34 +0,0 @@ -version: "2" - -services: - clickhouse-server: - image: ${CLICKHOUSE_SERVER_IMAGE} - expose: - - "8123" # HTTP - - "9000" # TCP - - "9009" # HTTP-interserver - restart: "no" - - test-runner: - image: clickhouse-statelest-test-runner:local - - restart: "no" - depends_on: - - clickhouse-server - environment: - # these are used by clickhouse-test to point clickhouse-client to the right server - - CLICKHOUSE_HOST=clickhouse-server - - CLICKHOUSE_PORT=9009 - - CLICKHOUSE_TEST_HOST_EXPOSED_PORT=51234 - expose: - # port for any test to serve data to clickhouse-server on rare occasion (like URL-engine tables in 00646), - # should match value of CLICKHOUSE_TEST_HOST_EXPOSED_PORT above - - "51234" - - # NOTE: Dev-mode: mount newest versions of the queries and clickhouse-test script into container. - # volumes: - # - /home/enmk/proj/ClickHouse_master/tests/queries:/usr/share/clickhouse-test/queries:ro - # - /home/enmk/proj/ClickHouse_master/tests/clickhouse-test:/usr/bin/clickhouse-test:ro - - # String-form instead of list-form to allow multiple arguments in "${CLICKHOUSE_TEST_ARGS}" - entrypoint: "clickhouse-test ${CLICKHOUSE_TEST_ARGS}" diff --git a/docker/test/testflows/runner/process_testflows_result.py b/docker/test/testflows/runner/process_testflows_result.py index 37d0b6a69d1..8bfc4ac0b0f 100755 --- a/docker/test/testflows/runner/process_testflows_result.py +++ b/docker/test/testflows/runner/process_testflows_result.py @@ -22,9 +22,9 @@ def process_result(result_folder): total_other = 0 test_results = [] for test in results["tests"]: - test_name = test['test']['test_name'] - test_result = test['result']['result_type'].upper() - test_time = str(test['result']['message_rtime']) + test_name = test["test"]["test_name"] + test_result = test["result"]["result_type"].upper() + test_time = str(test["result"]["message_rtime"]) total_tests += 1 if test_result == "OK": total_ok += 1 @@ -39,24 +39,29 @@ def process_result(result_folder): else: status = "success" - description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other) + description = "failed: {}, passed: {}, other: {}".format( + total_fail, total_ok, total_other + ) return status, description, test_results, [json_path, test_binary_log] def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) + if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests") - parser.add_argument("--in-results-dir", default='./') - parser.add_argument("--out-results-file", default='./test_results.tsv') - parser.add_argument("--out-status-file", default='./check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of Testflows tests" + ) + parser.add_argument("--in-results-dir", default="./") + parser.add_argument("--out-results-file", default="./test_results.tsv") + parser.add_argument("--out-status-file", default="./check_status.tsv") args = parser.parse_args() state, description, test_results, logs = process_result(args.in_results_dir) @@ -64,4 +69,3 @@ if __name__ == "__main__": status = (state, description) write_results(args.out_results_file, args.out_status_file, test_results, status) logging.info("Result written") - diff --git a/docker/test/unit/process_unit_tests_result.py b/docker/test/unit/process_unit_tests_result.py index 7219aa13b82..0550edc7c25 100755 --- a/docker/test/unit/process_unit_tests_result.py +++ b/docker/test/unit/process_unit_tests_result.py @@ -5,24 +5,26 @@ import logging import argparse import csv -OK_SIGN = 'OK ]' -FAILED_SIGN = 'FAILED ]' -SEGFAULT = 'Segmentation fault' -SIGNAL = 'received signal SIG' -PASSED = 'PASSED' +OK_SIGN = "OK ]" +FAILED_SIGN = "FAILED ]" +SEGFAULT = "Segmentation fault" +SIGNAL = "received signal SIG" +PASSED = "PASSED" + def get_test_name(line): - elements = reversed(line.split(' ')) + elements = reversed(line.split(" ")) for element in elements: - if '(' not in element and ')' not in element: + if "(" not in element and ")" not in element: return element raise Exception("No test name in line '{}'".format(line)) + def process_result(result_folder): summary = [] total_counter = 0 failed_counter = 0 - result_log_path = '{}/test_result.txt'.format(result_folder) + result_log_path = "{}/test_result.txt".format(result_folder) if not os.path.exists(result_log_path): logging.info("No output log on path %s", result_log_path) return "exception", "No output log", [] @@ -30,7 +32,7 @@ def process_result(result_folder): status = "success" description = "" passed = False - with open(result_log_path, 'r') as test_result: + with open(result_log_path, "r") as test_result: for line in test_result: if OK_SIGN in line: logging.info("Found ok line: '%s'", line) @@ -38,7 +40,7 @@ def process_result(result_folder): logging.info("Test name: '%s'", test_name) summary.append((test_name, "OK")) total_counter += 1 - elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line: + elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line: logging.info("Found fail line: '%s'", line) test_name = get_test_name(line.strip()) logging.info("Test name: '%s'", test_name) @@ -67,25 +69,30 @@ def process_result(result_folder): status = "failure" if not description: - description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter) + description += "fail: {}, passed: {}".format( + failed_counter, total_counter - failed_counter + ) return status, description, summary def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) + if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of unit tests" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results = process_result(args.in_results_dir) @@ -93,4 +100,3 @@ if __name__ == "__main__": status = (state, description) write_results(args.out_results_file, args.out_status_file, test_results, status) logging.info("Result written") - diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 82df170686d..dadda55c830 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"] RETRIES_SIGN = "Some tests were restarted" + def process_test_log(log_path): total = 0 skipped = 0 @@ -26,7 +27,7 @@ def process_test_log(log_path): retries = False task_timeout = True test_results = [] - with open(log_path, 'r') as test_file: + with open(log_path, "r") as test_file: for line in test_file: original_line = line line = line.strip() @@ -36,12 +37,15 @@ def process_test_log(log_path): hung = True if RETRIES_SIGN in line: retries = True - if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)): - test_name = line.split(' ')[2].split(':')[0] + if any( + sign in line + for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN) + ): + test_name = line.split(" ")[2].split(":")[0] - test_time = '' + test_time = "" try: - time_token = line.split(']')[1].strip().split()[0] + time_token = line.split("]")[1].strip().split()[0] float(time_token) test_time = time_token except: @@ -66,9 +70,22 @@ def process_test_log(log_path): elif len(test_results) > 0 and test_results[-1][1] == "FAIL": test_results[-1][3].append(original_line) - test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results] + test_results = [ + (test[0], test[1], test[2], "".join(test[3])) for test in test_results + ] + + return ( + total, + skipped, + unknown, + failed, + success, + hung, + task_timeout, + retries, + test_results, + ) - return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results def process_result(result_path): test_results = [] @@ -76,16 +93,26 @@ def process_result(result_path): description = "" files = os.listdir(result_path) if files: - logging.info("Find files in result folder %s", ','.join(files)) - result_path = os.path.join(result_path, 'test_result.txt') + logging.info("Find files in result folder %s", ",".join(files)) + result_path = os.path.join(result_path, "test_result.txt") else: result_path = None description = "No output log" state = "error" if result_path and os.path.exists(result_path): - total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path) - is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1)) + ( + total, + skipped, + unknown, + failed, + success, + hung, + task_timeout, + retries, + test_results, + ) = process_test_log(result_path) + is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1)) logging.info("Is flacky check: %s", is_flacky_check) # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped. @@ -120,20 +147,22 @@ def process_result(result_path): def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of functional tests" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results = process_result(args.in_results_dir) diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 81887eb8b8e..f9dfebff3f9 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally). If it fails, fix the style errors following the [code style guide](style.md). +Python code is checked with [black](https://github.com/psf/black/). + ### Report Details - [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html) - `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt). diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 9d1836b0ff2..db78637f104 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -229,6 +229,25 @@ As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion. +## Debugging + +Many graphical IDEs offer with an integrated debugger but you can also use a standalone debugger. + +### GDB + +### LLDB + + # tell LLDB where to find the source code + settings set target.source-map /path/to/build/dir /path/to/source/dir + + # configure LLDB to display code before/after currently executing line + settings set stop-line-count-before 10 + settings set stop-line-count-after 10 + + target create ./clickhouse-client + # + process launch -- --query="SELECT * FROM TAB" + ## Writing Code {#writing-code} The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/ diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index b804b9c2279..61147467690 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -137,7 +137,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index 1d80f143098..3eb00bad33b 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -47,7 +47,7 @@ Optional parameters: - `kafka_row_delimiter` — Delimiter character, which ends the message. - `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. -- `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. +- `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. - `kafka_max_block_size` — The maximum batch size (in messages) for poll (default: `max_block_size`). - `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). - `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`). diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index a0acda5d5c6..b70cd225cdd 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -688,7 +688,7 @@ Tags: - `volume_name_N` — Volume name. Volume names must be unique. - `disk` — a disk within a volume. - `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. -- `move_factor` — when the amount of available space gets lower than this factor, data automatically start to move on the next volume if any (by default, 0.1). +- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. - `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. Cofiguration examples: diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index efc807b75fa..87096354f1f 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -159,6 +159,10 @@ $ clickhouse-client --query "select count(*) from datasets.ontime" !!! info "Info" If you will run the queries described below, you have to use the full table name, `datasets.ontime`. + +!!! info "Info" + If you are using the prepared partitions or the Online Playground replace any occurrence of `IATA_CODE_Reporting_Airline` or `IATA_CODE_Reporting_Airline AS Carrier` in the following queries with `Carrier` (see `describe ontime`). + ## Queries {#queries} Q0. diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md index 6c44f250242..01d7dd5b69f 100644 --- a/docs/en/getting-started/playground.md +++ b/docs/en/getting-started/playground.md @@ -5,30 +5,19 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. - -[ClickHouse Playground](https://play.clickhouse.com) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. -Several example datasets are available in Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). ## Credentials {#credentials} -| Parameter | Value | -|:--------------------|:----------------------------------------| -| HTTPS endpoint | `https://play-api.clickhouse.com:8443` | -| Native TCP endpoint | `play-api.clickhouse.com:9440` | -| User | `playground` | -| Password | `clickhouse` | - -There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above): - -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` - -!!! note "Note" - All these endpoints require a secure TLS connection. +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | ## Limitations {#limitations} @@ -37,23 +26,18 @@ The queries are executed as a read-only user. It implies some limitations: - DDL queries are not allowed - INSERT queries are not allowed -The following settings are also enforced: - -- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time) +The service also have quotas on its usage. ## Examples {#examples} HTTPS endpoint example with `curl`: ``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" +clickhouse client --secure --host play.clickhouse.com --user explorer ``` diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index eaf7a96ce42..a252f55de2c 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -124,7 +124,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. - `--stacktrace` – If specified, also print the stack trace if an exception occurs. - `--config-file` – The name of the configuration file. -- `--secure` – If specified, will connect to server over secure connection. +- `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). - `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). - `--hardware-utilization` — Print hardware utilization information in progress bar. @@ -148,7 +148,12 @@ Example of a config file: username password - False + true + + + /etc/ssl/cert.pem + + ``` diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 22c647bee13..a7066fca087 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -51,6 +51,7 @@ The supported formats are: | [PrettySpace](#prettyspace) | ✗ | ✔ | | [Protobuf](#protobuf) | ✔ | ✔ | | [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [ProtobufList](#protobuflist) | ✔ | ✔ | | [Avro](#data-format-avro) | ✔ | ✔ | | [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | | [Parquet](#data-format-parquet) | ✔ | ✔ | @@ -401,7 +402,7 @@ Parsing allows the presence of the additional field `tskv` without the equal sig Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, strings are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ``` bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv @@ -409,7 +410,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR \*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information. -When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. +When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Strings can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing strings without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. If setting [input_format_csv_empty_as_default](../operations/settings/settings.md#settings-input_format_csv_empty_as_default) is enabled, empty unquoted input values are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too. @@ -1230,7 +1231,38 @@ See also [how to read/write length-delimited protobuf messages in popular langua ## ProtobufSingle {#protobufsingle} -Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters. +Same as [Protobuf](#protobuf) but for storing/parsing a single Protobuf message without length delimiter. +As a result, only a single table row can be written/read. + +## ProtobufList {#protobuflist} + +Similar to Protobuf but rows are represented as a sequence of sub-messages contained in a message with fixed name "Envelope". + +Usage example: + +``` sql +SELECT * FROM test.table FORMAT ProtobufList SETTINGS format_schema = 'schemafile:MessageType' +``` + +``` bash +cat protobuflist_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT ProtobufList SETTINGS format_schema='schemafile:MessageType'" +``` + +where the file `schemafile.proto` looks like this: + +``` capnp +syntax = "proto3"; + +message Envelope { + message MessageType { + string name = 1; + string surname = 2; + uint32 birthDate = 3; + repeated string phoneNumbers = 4; + }; + MessageType row = 1; +}; +``` ## Avro {#data-format-avro} @@ -1364,7 +1396,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1421,7 +1454,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1483,7 +1517,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index d72fb4d6f17..ca2a165bbd8 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -5,11 +5,10 @@ toc_title: HTTP Interface # HTTP Interface {#http-interface} -The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility. +The HTTP interface lets you use ClickHouse on any platform from any programming language in a form of REST API. The HTTP interface is more limited than the native interface, but it has better language support. By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config). - -Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples. +HTTPS can be enabled as well with port 8443 by default. If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end) @@ -18,11 +17,12 @@ $ curl 'http://localhost:8123/' Ok. ``` +Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples. + Web UI can be accessed here: `http://localhost:8123/play`. ![Web UI](../images/play.png) - In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. See also `/replicas_status` to check replica's delay. ``` bash @@ -32,7 +32,7 @@ $ curl 'http://localhost:8123/replicas_status' Ok. ``` -Send the request as a URL ‘query’ parameter, or as a POST. Or send the beginning of the query in the ‘query’ parameter, and the rest in the POST (we’ll explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries. +Send the request as a URL ‘query’ parameter, or as a POST. Or send the beginning of the query in the ‘query’ parameter, and the rest in the POST (we’ll explain later why this is necessary). The size of the URL is limited to 1 MiB by default, this can be changed with the `http_max_uri_size` setting. If successful, you receive the 200 response code and the result in the response body. If an error occurs, you receive the 500 response code and an error description text in the response body. diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 9c7fab7424d..2c5c0b7cd52 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -28,6 +28,7 @@ toc_title: Adopters | Badoo | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) | | Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | | Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | +| Better Stack | Cloud, SaaS | Log Management | - | - | [Official Website](https://betterstack.com/logtail) | | BIGO | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) | | BiliBili | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) | | Bloomberg | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | @@ -43,7 +44,7 @@ toc_title: Adopters | Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | | Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | | Comcast | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) | -| ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | +| Contentsquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | | Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | | CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | | Crazypanda | Games | | — | — | Live session on ClickHouse meetup | @@ -112,7 +113,7 @@ toc_title: Adopters | NLMK | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) | | NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | | Noction | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability) -| ntop | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) | +| ntop | Network Monitoning | Monitoring | — | — | [Official website, January 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) | | Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | | Ok.ru | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | | Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | @@ -123,6 +124,7 @@ toc_title: Adopters | Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) | | Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | | Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | +| PingCAP | Analytics | Real-Time Transactional and Analytical Processing | - | - | [GitHub, TiFlash/TiDB](https://github.com/pingcap/tiflash) | | Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | | PostHog | Product Analytics | Main Product | — | — | [Release Notes, October 2020](https://posthog.com/blog/the-posthog-array-1-15-0), [Blog, November 2021](https://posthog.com/blog/how-we-turned-clickhouse-into-our-eventmansion) | | Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | @@ -158,6 +160,8 @@ toc_title: Adopters | Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | | Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | | Superwall | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) | +| Swetrix | Analytics | Main Product | — | — | [Source code](https://github.com/swetrix/swetrix-api) | +| Synpse | Application Management | Main Product | - | - | [Tweet, January 2022](https://twitter.com/KRusenas/status/1483571168363880455) | | Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | | Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | | Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | @@ -171,6 +175,7 @@ toc_title: Adopters | UTMSTAT | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) | | Vercel | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 | | VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| VKontech | Distributed Systems | Migrating from MongoDB | - | - | [Blog, January 2022](https://vkontech.com/migrating-your-reporting-queries-from-a-general-purpose-db-mongodb-to-a-data-warehouse-clickhouse-performance-overview/) | | VMware | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) | | Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | | Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | @@ -195,5 +200,7 @@ toc_title: Adopters | ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | | АС "Стрела" | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | +| Piwik PRO | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) | +| Deepglint 格灵深瞳 | AI, Computer Vision | OLAP | — | — | [Official Website](https://www.deepglint.com/) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 279204a8af1..9aa6419d89c 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -5,7 +5,7 @@ toc_title: Caches # Cache Types {#cache-types} -When performing queries, ClichHouse uses different caches. +When performing queries, ClickHouse uses different caches. Main cache types: diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 35ec5d858f5..26d61dabaf9 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -3,13 +3,10 @@ toc_priority: 66 toc_title: ClickHouse Keeper --- -# [pre-production] ClickHouse Keeper {#clickHouse-keeper} +# ClickHouse Keeper {#clickHouse-keeper} ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper. -!!! warning "Warning" - This feature is currently in the pre-production stage. We test it in our CI and on small internal installations. - ## Implementation details {#implementation-details} ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages. @@ -55,7 +52,7 @@ Internal coordination settings are located in `..` section and contain servers description. @@ -121,7 +118,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro". +The 4lw commands has a allow list configuration `four_letter_word_allow_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro". You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. @@ -201,7 +198,7 @@ Server stats reset. ``` server_id=1 tcp_port=2181 -four_letter_word_white_list=* +four_letter_word_allow_list=* log_storage_path=./coordination/logs snapshot_storage_path=./coordination/snapshots max_requests_batch_size=100 diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index dce7938f98b..ab972c72345 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -36,6 +36,7 @@ Example of configuration: AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY CSV + https://s3.us-east-1.amazonaws.com/yourbucket/mydata/ @@ -44,12 +45,12 @@ Example of configuration: ### Example of using named connections with the s3 function ```sql -INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', +INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') SELECT * FROM numbers(10000); SELECT count() -FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz') +FROM s3(s3_mydata, filename = 'test_file.tsv.gz') ┌─count()─┐ │ 10000 │ diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 5652a294e78..3e1f8bbcca0 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -366,12 +366,12 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`.
]]>
-``` +``` ## hsts_max_age {#hsts-max-age} - -Expired time for HSTS in seconds. The default value is 0 means clickhouse disabled HSTS. If you set a positive number, the HSTS will be enabled and the max-age is the number you set. - -**Example** + +Expired time for HSTS in seconds. The default value is 0 means clickhouse disabled HSTS. If you set a positive number, the HSTS will be enabled and the max-age is the number you set. + +**Example** ```xml 600000 @@ -480,7 +480,7 @@ To enable authentication, set `interserver_http_credentials.allow_empty` to `tru After configuring all replicas set `allow_empty` to `false` or remove this setting. It makes authentication with new credentials mandatory. -To change existing credentials, move the username and the password to `interserver_http_credentials.old` section and update `user` and `password` with new values. At this point the server uses new credentials to connect to other replicas and accepts connections with either new or old credentials. +To change existing credentials, move the username and the password to `interserver_http_credentials.old` section and update `user` and `password` with new values. At this point the server uses new credentials to connect to other replicas and accepts connections with either new or old credentials. ``` xml @@ -846,7 +846,7 @@ The value 0 means that you can delete all tables without any restrictions. ClickHouse uses threads from the Global Thread pool to process queries. If there is no idle thread to process a query, then a new thread is created in the pool. `max_thread_pool_size` limits the maximum number of threads in the pool. -Possible values: +Possible values: - Positive integer. @@ -862,7 +862,7 @@ Default value: `10000`. If the number of **idle** threads in the Global Thread pool is greater than `max_thread_pool_free_size`, then ClickHouse releases resources occupied by some threads and the pool size is decreased. Threads can be created again if necessary. -Possible values: +Possible values: - Positive integer. @@ -878,7 +878,7 @@ Default value: `1000`. The maximum number of jobs that can be scheduled on the Global Thread pool. Increasing queue size leads to larger memory usage. It is recommended to keep this value equal to [max_thread_pool_size](#max-thread-pool-size). -Possible values: +Possible values: - Positive integer. @@ -953,30 +953,30 @@ For more information, see the MergeTreeSettings.h header file. SSL client/server configuration. -Support for SSL is provided by the `libpoco` library. The interface is described in the file [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) +Support for SSL is provided by the `libpoco` library. The available configuration options are explained in [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h). Default values can be found in [SSLManager.cpp](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/src/SSLManager.cpp). Keys for server/client settings: - privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. - certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate. -- caConfig – The path to the file or directory that contains trusted root certificates. -- verificationMode – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. -- verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. -- loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \| -- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. -- cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. -- sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. -- sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. -- sessionTimeout – Time for caching the session on the server. -- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`. -- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`. -- requireTLSv1_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. -- requireTLSv1_2 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. -- fips – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. -- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. -- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . -- disableProtocols – Protocols that are not allowed to use. -- preferServerCiphers – Preferred server ciphers on the client. +- caConfig (default: none) – The path to the file or directory that contains trusted CA certificates. If this points to a file, it must be in PEM format and can contain several CA certificates. If this points to a directory, it must contain one .pem file per CA certificate. The filenames are looked up by the CA subject name hash value. Details can be found in the man page of [SSL_CTX_load_verify_locations](https://www.openssl.org/docs/man3.0/man3/SSL_CTX_load_verify_locations.html). +- verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. +- verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. +- loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`). +- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions. +- cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. +- sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. +- sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions. +- sessionTimeout (default: [2h](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1926)) – Time for caching the session on the server. +- extendedVerification (default: false) – If enabled, verify that the certificate CN or SAN matches the peer hostname. +- requireTLSv1 (default: false) – Require a TLSv1 connection. Acceptable values: `true`, `false`. +- requireTLSv1_1 (default: false) – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. +- requireTLSv1_2 (default: false) – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. +- fips (default: false) – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. +- privateKeyPassphraseHandler (default: `KeyConsoleHandler`)– Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. +- invalidCertificateHandler (default: `ConsoleCertificateHandler`) – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . +- disableProtocols (default: "") – Protocols that are not allowed to use. +- preferServerCiphers (default: false) – Preferred server ciphers on the client. **Example of settings:** @@ -1479,6 +1479,18 @@ The update is performed asynchronously, in a separate system thread. - [background_schedule_pool_size](../../operations/settings/settings.md#background_schedule_pool_size) + +## dns_max_consecutive_failures {#server-settings-dns-max-consecutive-failures} + +The number of consecutive failures accepted when updating a DNS cache entry before it is dropped. +Use `0` to disable cache dropping (entries will only be cleaned by `SYSTEM DROP DNS CACHE`) + +**Default value**: 5. + +**See also** + +- [`SYSTEM DROP DNS CACHE`](../../sql-reference/statements/system.md#query_language-system-drop-dns-cache) + ## distributed_ddl {#server-settings-distributed_ddl} Manage executing [distributed ddl queries](../../sql-reference/distributed-ddl.md) (CREATE, DROP, ALTER, RENAME) on cluster. @@ -1628,3 +1640,14 @@ Possible values: Default value: `10000`. +## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds} + +Sets maximum waiting time for global overcommit tracker. + +Possible values: + +- Positive integer. + +Default value: `0`. + + diff --git a/docs/en/operations/settings/memory-overcommit.md b/docs/en/operations/settings/memory-overcommit.md new file mode 100644 index 00000000000..3f99382b826 --- /dev/null +++ b/docs/en/operations/settings/memory-overcommit.md @@ -0,0 +1,31 @@ +# Memory overcommit + +Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries. + +The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use. +When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query. + +When memory limit is reached any query will wait some time during atempt to allocate new memory. +If timeout is passed and memory is freed, the query continues execution. Otherwise an exception will be thrown and the query is killed. + +Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached. + +## User overcommit tracker + +User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list. +Overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage` setting. + +Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting. + +**Example** + +```sql +SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage=4000, memory_usage_overcommit_max_wait_microseconds=500 +``` + +## Global overcommit tracker + +Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries. +In this case overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage_for_user` setting. + +Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c04ca5822e6..07abd77fed0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -519,6 +519,33 @@ Possible values: Default value: `1`. +## allow_settings_after_format_in_insert {#allow_settings_after_format_in_insert} + +Control whether `SETTINGS` after `FORMAT` in `INSERT` queries is allowed or not. It is not recommended to use this, since this may interpret part of `SETTINGS` as values. + +Example: + +```sql +INSERT INTO FUNCTION null('foo String') SETTINGS max_threads=1 VALUES ('bar'); +``` + +But the following query will work only with `allow_settings_after_format_in_insert`: + +```sql +SET allow_settings_after_format_in_insert=1; +INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1; +``` + +Possible values: + +- 0 — Disallow. +- 1 — Allow. + +Default value: `0`. + +!!! note "Warning" + Use this setting only for backward compatibility if your use cases depend on old syntax. + ## input_format_skip_unknown_fields {#settings-input-format-skip-unknown-fields} Enables or disables skipping insertion of extra data. @@ -1062,6 +1089,15 @@ Result: └─────────────┴───────────┘ ``` +## log_processors_profiles {#settings-log_processors_profiles} + +Write time that processor spent during execution/waiting for data to `system.processors_profile_log` table. + +See also: + +- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#system-processors_profile_log) +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) + ## max_insert_block_size {#settings-max_insert_block_size} The size of blocks (in a count of rows) to form for insertion into a table. @@ -3290,6 +3326,19 @@ Possible values: Default value: `16`. +## max_insert_delayed_streams_for_parallel_write {#max-insert-delayed-streams-for-parallel-write} + +The maximum number of streams (columns) to delay final part flush. + +It makes difference only if underlying storage supports parallel write (i.e. S3), otherwise it will not give any benefit. + +Possible values: + +- Positive integer. +- 0 or 1 — Disabled. + +Default value: `1000` for S3 and `0` otherwise. + ## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied). @@ -4207,10 +4256,36 @@ Possible values: - 0 — Disabled. - 1 — Enabled. The wait time equal shutdown_wait_unfinished config. -Default value: 0. +Default value: `0`. ## shutdown_wait_unfinished The waiting time in seconds for currently handled connections when shutdown server. -Default Value: 5. +Default Value: `5`. + +## max_guaranteed_memory_usage + +Maximum guaranteed memory usage for processing of single query. +It represents soft limit in case when hard limit is reached on user level. +Zero means unlimited. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. + +## memory_usage_overcommit_max_wait_microseconds + +Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level. +If the timeout is reached and memory is not freed, an exception is thrown. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. + +## max_guaranteed_memory_usage_for_user + +Maximum guaranteed memory usage for processing all concurrently running queries for the user. +It represents soft limit in case when hard limit is reached on global level. +Zero means unlimited. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index beffd45bcbd..6cda47ab9fb 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -11,10 +11,6 @@ To work with data stored on `Amazon S3` disks use [S3](../engines/table-engines/ To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver). -## Zero-copy Replication {#zero-copy} - -ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. - ## Configuring HDFS {#configuring-hdfs} [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) and [Log](../engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`. @@ -316,3 +312,8 @@ When loading files by `endpoint`, they must be loaded into `/store/` p If URL is not reachable on disk load when the server is starting up tables, then all errors are caught. If in this case there were errors, tables can be reloaded (become visible) via `DETACH TABLE table_name` -> `ATTACH TABLE table_name`. If metadata was successfully loaded at server startup, then tables are available straight away. Use [http_max_single_read_retries](../operations/settings/settings.md#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read. + + +## Zero-copy Replication (not ready for production) {#zero-copy} + +ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md new file mode 100644 index 00000000000..2d76edb5dd7 --- /dev/null +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -0,0 +1,75 @@ +# system.processors_profile_log {#system-processors_profile_log} + +This table contains profiling on processors level (that you can find in [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)). + +Columns: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. +- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. +- `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor +- `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query +- `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor. +- `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed. +- `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor). +- `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full. + +**Example** + +Query: + +``` sql +EXPLAIN PIPELINE +SELECT sleep(1) + +┌─explain─────────────────────────┐ +│ (Expression) │ +│ ExpressionTransform │ +│ (SettingQuotaAndLimits) │ +│ (ReadFromStorage) │ +│ SourceFromSingleChunk 0 → 1 │ +└─────────────────────────────────┘ + +SELECT sleep(1) +SETTINGS log_processors_profiles = 1 + +Query id: feb5ed16-1c24-4227-aa54-78c02b3b27d4 + +┌─sleep(1)─┐ +│ 0 │ +└──────────┘ + +1 rows in set. Elapsed: 1.018 sec. + +SELECT + name, + elapsed_us, + input_wait_elapsed_us, + output_wait_elapsed_us +FROM system.processors_profile_log +WHERE query_id = 'feb5ed16-1c24-4227-aa54-78c02b3b27d4' +ORDER BY name ASC +``` + +Result: + +``` text +┌─name────────────────────┬─elapsed_us─┬─input_wait_elapsed_us─┬─output_wait_elapsed_us─┐ +│ ExpressionTransform │ 1000497 │ 2823 │ 197 │ +│ LazyOutputFormat │ 36 │ 1002188 │ 0 │ +│ LimitsCheckingTransform │ 10 │ 1002994 │ 106 │ +│ NullSource │ 5 │ 1002074 │ 0 │ +│ NullSource │ 1 │ 1002084 │ 0 │ +│ SourceFromSingleChunk │ 45 │ 4736 │ 1000819 │ +└─────────────────────────┴────────────┴───────────────────────┴────────────────────────┘ +``` + +Here you can see: + +- `ExpressionTransform` was executing `sleep(1)` function, so it `work` will takes 1e6, and so `elapsed_us` > 1e6. +- `SourceFromSingleChunk` need to wait, because `ExpressionTransform` does not accept any data during execution of `sleep(1)`, so it will be in `PortFull` state for 1e6 us, and so `output_wait_elapsed_us` > 1e6. +- `LimitsCheckingTransform`/`NullSource`/`LazyOutputFormat` need to wait until `ExpressionTransform` will execute `sleep(1)` to process the result, so `input_wait_elapsed_us` > 1e6. + +**See Also** + +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md new file mode 100644 index 00000000000..0237885bcb6 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -0,0 +1,48 @@ +--- +toc_priority: 108 +--- + +# groupArraySorted {#groupArraySorted} + +Returns an array with the first N items in ascending order. + +``` sql +groupArraySorted(N)(column) +``` + +**Arguments** + +- `N` – The number of elements to return. + +If the parameter is omitted, default value 10 is used. + +**Arguments** + +- `column` – The value. +- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves. + +**Example** + +Gets the first 10 numbers: + +``` sql +SELECT groupArraySorted(10)(number) FROM numbers(100) +``` + +``` text +┌─groupArraySorted(10)(number)─┐ +│ [0,1,2,3,4,5,6,7,8,9] │ +└──────────────────────────────┘ +``` + +Or the last 10: + +``` sql +SELECT groupArraySorted(10)(number, -number) FROM numbers(100) +``` + +``` text +┌─groupArraySorted(10)(number, negate(number))─┐ +│ [99,98,97,96,95,94,93,92,91,90] │ +└──────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 59befed8785..2a8a2843510 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -35,6 +35,7 @@ ClickHouse-specific aggregate functions: - [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) - [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) +- [groupArraySorted](../../../sql-reference/aggregate-functions/reference/grouparraysorted.md) - [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index effcc614930..abef03a3914 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -225,15 +225,15 @@ This storage method works the same way as hashed and allows using date/time (arb Example: The table contains discounts for each advertiser in the format: ``` text -+---------|-------------|-------------|------+ ++---------------|---------------------|-------------------|--------+ | advertiser id | discount start date | discount end date | amount | +===============+=====================+===================+========+ | 123 | 2015-01-01 | 2015-01-15 | 0.15 | -+---------|-------------|-------------|------+ ++---------------|---------------------|-------------------|--------+ | 123 | 2015-01-16 | 2015-01-31 | 0.25 | -+---------|-------------|-------------|------+ ++---------------|---------------------|-------------------|--------+ | 456 | 2015-01-01 | 2015-01-15 | 0.05 | -+---------|-------------|-------------|------+ ++---------------|---------------------|-------------------|--------+ ``` To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others). @@ -272,10 +272,10 @@ LAYOUT(RANGE_HASHED()) RANGE(MIN first MAX last) ``` -To work with these dictionaries, you need to pass an additional argument to the `dictGetT` function, for which a range is selected: +To work with these dictionaries, you need to pass an additional argument to the `dictGet*` function, for which a range is selected: ``` sql -dictGetT('dict_name', 'attr_name', id, date) +dictGet*('dict_name', 'attr_name', id, date) ``` This function returns the value for the specified `id`s and the date range that includes the passed date. @@ -479,17 +479,17 @@ This type of storage is for mapping network prefixes (IP addresses) to metadata Example: The table contains network prefixes and their corresponding AS number and country code: ``` text - +-----------|-----|------+ + +-----------------|-------|--------+ | prefix | asn | cca2 | +=================+=======+========+ | 202.79.32.0/20 | 17501 | NP | - +-----------|-----|------+ + +-----------------|-------|--------+ | 2620:0:870::/48 | 3856 | US | - +-----------|-----|------+ + +-----------------|-------|--------+ | 2a02:6b8:1::/48 | 13238 | RU | - +-----------|-----|------+ + +-----------------|-------|--------+ | 2001:db8::/32 | 65536 | ZZ | - +-----------|-----|------+ + +-----------------|-------|--------+ ``` When using this type of layout, the structure must have a composite key. @@ -538,10 +538,10 @@ PRIMARY KEY prefix The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet. -For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys: +For queries, you must use the same functions (`dictGet*` with a tuple) as for dictionaries with composite keys: ``` sql -dictGetT('dict_name', 'attr_name', tuple(ip)) +dictGet*('dict_name', 'attr_name', tuple(ip)) ``` The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6: diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 8231cda4b77..eebc489fffa 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1392,12 +1392,24 @@ Returns the first element in the `arr1` array for which `func` returns something Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +## arrayFirstOrNull(func, arr1, …) {#array-first-or-null} + +Returns the first element in the `arr1` array for which `func` returns something other than 0. If there are no such element, returns null. + +Note that the `arrayFirstOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + ## arrayLast(func, arr1, …) {#array-last} Returns the last element in the `arr1` array for which `func` returns something other than 0. Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +## arrayLastOrNull(func, arr1, …) {#array-last-or-null} + +Returns the last element in the `arr1` array for which `func` returns something other than 0. If there are no such element, returns null. + +Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + ## arrayFirstIndex(func, arr1, …) {#array-first-index} Returns the index of the first element in the `arr1` array for which `func` returns something other than 0. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index d535a516b3a..fc48c97bb61 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -393,6 +393,13 @@ This is a generalization of other functions named `toStartOf*`. For example, `toStartOfInterval(t, INTERVAL 1 day)` returns the same as `toStartOfDay(t)`, `toStartOfInterval(t, INTERVAL 15 minute)` returns the same as `toStartOfFifteenMinutes(t)` etc. +## toLastDayOfMonth {#toLastDayOfMonth} + +Rounds up a date or date with time to the last day of the month. +Returns the date. + +Alias: `LAST_DAY`. + ## toTime {#totime} Converts a date with time to a certain fixed date, while preserving the time. diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index ecbe00adfd7..6d3f510bffc 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1026,4 +1026,185 @@ Result: │ 41162 │ └─────────────┘ ``` + +## h3PointDistM {#h3pointdistm} + +Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in meters. + +**Syntax** + +``` sql +h3PointDistM(lat1, lon1, lat2, lon2) +``` + +**Arguments** + +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). + +**Returned values** + +- Haversine or great circle distance in meters. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +select h3PointDistM(-10.0 ,0.0, 10.0, 0.0) as h3PointDistM; +``` + +Result: + +``` text +┌──────h3PointDistM─┐ +│ 2223901.039504589 │ +└───────────────────┘ +``` + +## h3PointDistKm {#h3pointdistkm} + +Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in kilometers. + +**Syntax** + +``` sql +h3PointDistKm(lat1, lon1, lat2, lon2) +``` + +**Arguments** + +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). + +**Returned values** + +- Haversine or great circle distance in kilometers. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +select h3PointDistKm(-10.0 ,0.0, 10.0, 0.0) as h3PointDistKm; +``` + +Result: + +``` text +┌─────h3PointDistKm─┐ +│ 2223.901039504589 │ +└───────────────────┘ +``` + +## h3PointDistRads {#h3pointdistrads} + +Returns the "great circle" or "haversine" distance between pairs of GeoCoord points (latitude/longitude) pairs in radians. + +**Syntax** + +``` sql +h3PointDistRads(lat1, lon1, lat2, lon2) +``` + +**Arguments** + +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). + +**Returned values** + +- Haversine or great circle distance in radians. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +select h3PointDistRads(-10.0 ,0.0, 10.0, 0.0) as h3PointDistRads; +``` + +Result: + +``` text +┌────h3PointDistRads─┐ +│ 0.3490658503988659 │ +└────────────────────┘ +``` + +## h3GetRes0Indexes {#h3getres0indexes} + +Returns an array of all the resolution 0 H3 indexes. + +**Syntax** + +``` sql +h3GetRes0Indexes() +``` + +**Returned values** + +- Array of all the resolution 0 H3 indexes. + +Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). + + +**Example** + +Query: + +``` sql +select h3GetRes0Indexes as indexes ; +``` + +Result: + +``` text +┌─indexes─────────────────────────────────────┐ +│ [576495936675512319,576531121047601151,....]│ +└─────────────────────────────────────────────┘ +``` + +## h3GetPentagonIndexes {#h3getpentagonindexes} + +Returns all the pentagon H3 indexes at the specified resolution. + +**Syntax** + +``` sql +h3GetPentagonIndexes(resolution) +``` + +**Parameter** + +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Array of all pentagon H3 indexes. + +Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). + +**Example** + +Query: + +``` sql +SELECT h3GetPentagonIndexes(3) AS indexes; +``` + +Result: + +``` text +┌─indexes────────────────────────────────────────────────────────┐ +│ [590112357393367039,590464201114255359,590816044835143679,...] │ +└────────────────────────────────────────────────────────────────┘ +``` + [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 7cceec889bd..572aa7f632e 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -77,7 +77,7 @@ A function configuration contains the following settings: - `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number. - `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. - `return_type` - the type of a returned value. -- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. +- `return_name` - name of returned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. - `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. - `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. - `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 469a66d460f..cf3f92580aa 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -13,10 +13,18 @@ Alias: `INET_NTOA`. ## IPv4StringToNum(s) {#ipv4stringtonums} -The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0. +The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it throws exception. Alias: `INET_ATON`. +## IPv4StringToNumOrDefault(s) {#ipv4stringtonums} + +Same as `IPv4StringToNum`, but if the IPv4 address has an invalid format, it returns 0. + +## IPv4StringToNumOrNull(s) {#ipv4stringtonums} + +Same as `IPv4StringToNum`, but if the IPv4 address has an invalid format, it returns null. + ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum} Similar to IPv4NumToString, but using xxx instead of the last octet. @@ -123,7 +131,7 @@ LIMIT 10 ## IPv6StringToNum {#ipv6stringtonums} -The reverse function of [IPv6NumToString](#ipv6numtostringx). If the IPv6 address has an invalid format, it returns a string of null bytes. +The reverse function of [IPv6NumToString](#ipv6numtostringx). If the IPv6 address has an invalid format, it throws exception. If the input string contains a valid IPv4 address, returns its IPv6 equivalent. HEX can be uppercase or lowercase. @@ -168,6 +176,14 @@ Result: - [cutIPv6](#cutipv6x-bytestocutforipv6-bytestocutforipv4). +## IPv6StringToNumOrDefault(s) {#ipv6stringtonums} + +Same as `IPv6StringToNum`, but if the IPv6 address has an invalid format, it returns 0. + +## IPv6StringToNumOrNull(s) {#ipv6stringtonums} + +Same as `IPv6StringToNum`, but if the IPv6 address has an invalid format, it returns null. + ## IPv4ToIPv6(x) {#ipv4toipv6x} Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples: @@ -261,6 +277,14 @@ SELECT └───────────────────────────────────┴──────────────────────────┘ ``` +## toIPv4OrDefault(string) {#toipv4ordefaultstring} + +Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0. + +## toIPv4OrNull(string) {#toipv4ornullstring} + +Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null. + ## toIPv6 {#toipv6string} Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. @@ -317,6 +341,14 @@ Result: └─────────────────────┘ ``` +## IPv6StringToNumOrDefault(s) {#toipv6ordefaultstring} + +Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns 0. + +## IPv6StringToNumOrNull(s) {#toipv6ornullstring} + +Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null. + ## isIPv4String {#isipv4string} Determines whether the input string is an IPv4 address or not. If `string` is IPv6 address returns `0`. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index bce3f9144b1..cedde8a7f35 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2499,3 +2499,41 @@ Result: │ 286 │ └──────────────────────────┘ ``` + +## getTypeSerializationStreams {#getTypeSerializationStreams} + +return the serialization streams of data type. + +**Syntax** +``` sql +getTypeSerializationStreams(type_name) + +getTypeSerializationStreams(column) +``` + +**Arguments** +- `type_name` - Name of data type to get its serialization paths. [String](../../sql-reference/data-types/string.md#string). +- `column` - any column which has a data type + +**Returned value** +- List of serialization streams; + +Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). + + + +**Example** + +Query: + +``` sql +SELECT getTypeSerializationStreams('Array(Array(Int8))') +``` + +Result: + +``` text +┌───────────────────────getTypeSerializationStreams('Array(Array(Int8))')─────────────────────────────┐ +│ ['{ArraySizes}','{ArrayElements, ArraySizes}','{ArrayElements, ArrayElements, Regular}'] │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 409ec422ade..ee663c92695 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -114,9 +114,9 @@ In addition, this column is not substituted when using an asterisk in a SELECT q ### EPHEMERAL {#ephemeral} -`EPHEMERAL expr` +`EPHEMERAL [expr]` -Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. +Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required. INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. ### ALIAS {#alias} diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 33644133153..c5421c83091 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -22,7 +22,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list [WHERE expr] [GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS] [HAVING expr] -[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] +[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [INTERPOLATE [(expr_list)]] [LIMIT [offset_value, ]n BY columns] [LIMIT [n, ]m] [WITH TIES] [SETTINGS ...] diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index b24f0213e4e..04630ba1075 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -280,6 +280,7 @@ To fill multiple columns, add `WITH FILL` modifier with optional parameters afte ``` sql ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr] +[INTERPOLATE [(col [AS expr], ... colN [AS exprN])]] ``` `WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings. @@ -287,6 +288,7 @@ When `FROM const_expr` not defined sequence of filling use minimal `expr` field When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`. When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals. When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type. +`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeate previous value. Omitted list will result in including all allowed columns. Example of a query without `WITH FILL`: @@ -483,4 +485,62 @@ Result: └────────────┴────────────┴──────────┘ ``` +Example of a query without `INTERPOLATE`: + +``` sql +SELECT n, source, inter FROM ( + SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter + FROM numbers(10) WHERE number % 3 = 1 +) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5; +``` + +Result: + +``` text +┌───n─┬─source───┬─inter─┐ +│ 0 │ │ 0 │ +│ 0.5 │ │ 0 │ +│ 1 │ original │ 1 │ +│ 1.5 │ │ 0 │ +│ 2 │ │ 0 │ +│ 2.5 │ │ 0 │ +│ 3 │ │ 0 │ +│ 3.5 │ │ 0 │ +│ 4 │ original │ 4 │ +│ 4.5 │ │ 0 │ +│ 5 │ │ 0 │ +│ 5.5 │ │ 0 │ +│ 7 │ original │ 7 │ +└─────┴──────────┴───────┘ +``` + +Same query after applying `INTERPOLATE`: + +``` sql +SELECT n, source, inter FROM ( + SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter + FROM numbers(10) WHERE number % 3 = 1 +) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS inter + 1); +``` + +Result: + +``` text +┌───n─┬─source───┬─inter─┐ +│ 0 │ │ 0 │ +│ 0.5 │ │ 0 │ +│ 1 │ original │ 1 │ +│ 1.5 │ │ 2 │ +│ 2 │ │ 3 │ +│ 2.5 │ │ 4 │ +│ 3 │ │ 5 │ +│ 3.5 │ │ 6 │ +│ 4 │ original │ 4 │ +│ 4.5 │ │ 5 │ +│ 5 │ │ 6 │ +│ 5.5 │ │ 7 │ +│ 7 │ original │ 7 │ +└─────┴──────────┴───────┘ +``` + [Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/) diff --git a/docs/en/whats-new/security-changelog.md b/docs/en/whats-new/security-changelog.md index bcfeaa06e24..685f1c6d21d 100644 --- a/docs/en/whats-new/security-changelog.md +++ b/docs/en/whats-new/security-changelog.md @@ -2,6 +2,49 @@ toc_priority: 76 toc_title: Security Changelog --- +## Fixed in ClickHouse 21.10.2.15, 2021-10-18 {#fixed-in-clickhouse-release-21-10-2-215-2021-10-18} + +### CVE-2021-43304 {#cve-2021-43304} + +Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy(op, ip, copy_end), don’t exceed the destination buffer’s limits. + +Credits: JFrog Security Research Team + +### CVE-2021-43305 {#cve-2021-43305} + +Heap buffer overflow in Clickhouse's LZ4 compression codec when parsing a malicious query. There is no verification that the copy operations in the LZ4::decompressImpl loop and especially the arbitrary copy operation wildCopy(op, ip, copy_end), don’t exceed the destination buffer’s limits. This issue is very similar to CVE-2021-43304, but the vulnerable copy operation is in a different wildCopy call. + +Credits: JFrog Security Research Team + +### CVE-2021-42387 {#cve-2021-42387} + +Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the upper bounds of the source of the copy operation. + +Credits: JFrog Security Research Team + +### CVE-2021-42388 {#cve-2021-42388} + +Heap out-of-bounds read in Clickhouse's LZ4 compression codec when parsing a malicious query. As part of the LZ4::decompressImpl() loop, a 16-bit unsigned user-supplied value ('offset') is read from the compressed data. The offset is later used in the length of a copy operation, without checking the lower bounds of the source of the copy operation. + +Credits: JFrog Security Research Team + +### CVE-2021-42389 {#cve-2021-42389} + +Divide-by-zero in Clickhouse's Delta compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. + +Credits: JFrog Security Research Team + +### CVE-2021-42390 {#cve-2021-42390} + +Divide-by-zero in Clickhouse's DeltaDouble compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. + +Credits: JFrog Security Research Team + +### CVE-2021-42391 {#cve-2021-42391} + +Divide-by-zero in Clickhouse's Gorilla compression codec when parsing a malicious query. The first byte of the compressed buffer is used in a modulo operation without being checked for 0. + +Credits: JFrog Security Research Team ## Fixed in ClickHouse 21.4.3.21, 2021-04-12 {#fixed-in-clickhouse-release-21-4-3-21-2021-04-12} diff --git a/docs/ja/development/developer-instruction.md b/docs/ja/development/developer-instruction.md index c95dc0e2ea4..48afc77237c 100644 --- a/docs/ja/development/developer-instruction.md +++ b/docs/ja/development/developer-instruction.md @@ -273,7 +273,7 @@ GitHubのUIでforkリポジトリに移動します。 ブランチで開発し プル要求は、作業がまだ完了していない場合でも作成できます。 この場合、単語を入れてください “WIP” (進行中の作業)タイトルの先頭に、それは後で変更することができます。 これは、変更の協調的なレビューと議論、および利用可能なすべてのテストの実行に役立ちます。 変更の簡単な説明を提供することが重要です。 -Yandexの従業員がタグであなたのPRにラベルを付けるとすぐにテストが開始されます “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. +ClickHouseの従業員がタグであなたのPRにラベルを付けるとすぐにテストが開始されます “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. システムは、プル要求用にClickHouseバイナリビルドを個別に準備します。 これらのビルドを取得するには “Details” 次のリンク “ClickHouse build check” 小切手のリストのエントリ。 そこには、ビルドへの直接リンクがあります。ClickHouseのdebパッケージは、本番サーバーにも展開できます(恐れがない場合)。 diff --git a/docs/ja/getting-started/playground.md b/docs/ja/getting-started/playground.md index 4e35096aa4b..01d7dd5b69f 100644 --- a/docs/ja/getting-started/playground.md +++ b/docs/ja/getting-started/playground.md @@ -5,58 +5,39 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. -[ClickHouse Playground](https://play.clickhouse.com) では、サーバーやクラスタを設定することなく、即座にクエリを実行して ClickHouse を試すことができます。 -いくつかの例のデータセットは、Playground だけでなく、ClickHouse の機能を示すサンプルクエリとして利用可能です. また、 ClickHouse の LTS リリースで試すこともできます。 +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). -任意の HTTP クライアントを使用してプレイグラウンドへのクエリを作成することができます。例えば[curl](https://curl.haxx.se)、[wget](https://www.gnu.org/software/wget/)、[JDBC](../interfaces/jdbc.md)または[ODBC](../interfaces/odbc.md)ドライバを使用して接続を設定します。 -ClickHouse をサポートするソフトウェア製品の詳細情報は[こちら](../interfaces/index.md)をご覧ください。 +## Credentials {#credentials} -## 資格情報 {#credentials} +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | -| パラメータ | 値 | -| :---------------------------- | :-------------------------------------- | -| HTTPS エンドポイント | `https://play-api.clickhouse.com:8443` | -| ネイティブ TCP エンドポイント | `play-api.clickhouse.com:9440` | -| ユーザ名 | `playgrounnd` | -| パスワード | `clickhouse` | +## Limitations {#limitations} +The queries are executed as a read-only user. It implies some limitations: -特定のClickHouseのリリースで試すために、追加のエンドポイントがあります。(ポートとユーザー/パスワードは上記と同じです)。 +- DDL queries are not allowed +- INSERT queries are not allowed -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` +The service also have quotas on its usage. -!!! note "備考" -これらのエンドポイントはすべて、安全なTLS接続が必要です。 +## Examples {#examples} - -## 制限事項 {#limitations} - -クエリは読み取り専用のユーザとして実行されます。これにはいくつかの制限があります。 - -- DDL クエリは許可されていません。 -- INSERT クエリは許可されていません。 - -また、以下の設定がなされています。 - -- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time) - -## 例 {#examples} - -`curl` を用いて HTTPSエンドポイントへ接続する例: +HTTPS endpoint example with `curl`: ``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` -[CLI](../interfaces/cli.md) で TCP エンドポイントへ接続する例: +TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" +clickhouse client --secure --host play.clickhouse.com --user explorer ``` diff --git a/docs/ja/introduction/adopters.md b/docs/ja/introduction/adopters.md index 6f878bf1dfe..3372bb74f12 100644 --- a/docs/ja/introduction/adopters.md +++ b/docs/ja/introduction/adopters.md @@ -27,7 +27,7 @@ toc_title: "\u30A2\u30C0\u30D7\u30BF\u30FC" | Cisco | ネットワーク | トラフィック分析 | — | — | [ライトニングトーク2019](https://youtu.be/-hI1vDR2oPY?t=5057) | | Citadel Securities | 金融 | — | — | — | [2019年の貢献](https://github.com/ClickHouse/ClickHouse/pull/4774) | | シティモービル | タクシー | 分析 | — | — | [ロシア語でのブログ投稿,月2020](https://habr.com/en/company/citymobil/blog/490660/) | -| ContentSquare | ウェブ分析 | 主な製品 | — | — | [フランス語でのブログ投稿,November2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | +| Contentsquare | ウェブ分析 | 主な製品 | — | — | [フランス語でのブログ投稿,November2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | | Cloudflare | CDN | トラフィック分析 | 36台のサーバー | — | [ブログ投稿,月2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [ブログ投稿,月2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | | コルネット | 分析 | 主な製品 | — | — | [2019年英語スライド](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | | CraiditX 氪信 | ファイナンスAI | 分析 | — | — | [2019年のスライド](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | diff --git a/tests/integration/test_host_ip_change/__init__.py b/docs/ja/sql-reference/aggregate-functions/reference/grouparraysorted.md similarity index 100% rename from tests/integration/test_host_ip_change/__init__.py rename to docs/ja/sql-reference/aggregate-functions/reference/grouparraysorted.md diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 964d39163d8..5b6740e88bb 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -72,11 +72,11 @@ ClickHouse не работает и не собирается на 32-битны Этот вариант не подходит для отправки изменений на сервер. Вы можете временно его использовать, а затем добавить ssh ключи и заменить адрес репозитория с помощью команды `git remote`. -Вы можете также добавить для своего локального репозитория адрес оригинального репозитория Яндекса, чтобы притягивать оттуда обновления: +Вы можете также добавить для своего локального репозитория адрес оригинального репозитория, чтобы притягивать оттуда обновления: git remote add upstream git@github.com:ClickHouse/ClickHouse.git -После этого, вы сможете добавлять в свой репозиторий обновления из репозитория Яндекса с помощью команды `git pull upstream master`. +После этого, вы сможете добавлять в свой репозиторий обновления из репозитория ClickHouse с помощью команды `git pull upstream master`. ### Работа с сабмодулями Git {#rabota-s-sabmoduliami-git} @@ -288,7 +288,7 @@ sudo ./llvm.sh 12 Pull request можно создать, даже если работа над задачей ещё не завершена. В этом случае, добавьте в его название слово «WIP» (work in progress). Название можно будет изменить позже. Это полезно для совместного просмотра и обсуждения изменений, а также для запуска всех имеющихся тестов. Введите краткое описание изменений - впоследствии, оно будет использовано для релизных changelog. -Тесты будут запущены, как только сотрудники Яндекса поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. +Тесты будут запущены, как только сотрудники ClickHouse поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 3f140f85396..f2a13569c23 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -182,7 +182,7 @@ Marks numbers: 0 1 2 3 4 5 6 7 8 Разреженный индекс допускает чтение лишних строк. При чтении одного диапазона первичного ключа, может быть прочитано до `index_granularity * 2` лишних строк в каждом блоке данных. -Разреженный индекс почти всегда помещаеся в оперативную память и позволяет работать с очень большим количеством строк в таблицах. +Разреженный индекс почти всегда помещается в оперативную память и позволяет работать с очень большим количеством строк в таблицах. ClickHouse не требует уникального первичного ключа. Можно вставить много строк с одинаковым первичным ключом. @@ -678,7 +678,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); - `volume_name_N` — название тома. Названия томов должны быть уникальны. - `disk` — диск, находящийся внутри тома. - `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том. -- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). +- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты. - `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками. Примеры конфигураций: diff --git a/docs/ru/getting-started/playground.md b/docs/ru/getting-started/playground.md index d9f65e192b5..01d7dd5b69f 100644 --- a/docs/ru/getting-started/playground.md +++ b/docs/ru/getting-started/playground.md @@ -5,53 +5,39 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. -[ClickHouse Playground](https://play.clickhouse.com) позволяет пользователям экспериментировать с ClickHouse, мгновенно выполняя запросы без настройки своего сервера или кластера. -В Playground доступны несколько тестовых массивов данных, а также примеры запросов, которые показывают возможности ClickHouse. Кроме того, вы можете выбрать LTS релиз ClickHouse, который хотите протестировать. +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). -Вы можете отправлять запросы к Playground с помощью любого HTTP-клиента, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), также можно установить соединение с помощью драйверов [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Более подробная информация о программных продуктах, поддерживающих ClickHouse, доступна [здесь](../interfaces/index.md). +## Credentials {#credentials} -## Параметры доступа {#credentials} +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | -| Параметр | Значение | -|:--------------------|:----------------------------------------| -| Конечная точка HTTPS| `https://play-api.clickhouse.com:8443` | -| Конечная точка TCP | `play-api.clickhouse.com:9440` | -| Пользователь | `playground` | -| Пароль | `clickhouse` | +## Limitations {#limitations} -Также можно подключаться к ClickHouse определённых релизов, чтобы протестировать их различия (порты и пользователь / пароль остаются неизменными): +The queries are executed as a read-only user. It implies some limitations: -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` +- DDL queries are not allowed +- INSERT queries are not allowed -!!! note "Примечание" - Для всех этих конечных точек требуется безопасное соединение TLS. +The service also have quotas on its usage. -## Ограничения {#limitations} +## Examples {#examples} -Запросы выполняются под пользователем с правами `readonly`, для которого есть следующие ограничения: -- запрещены DDL запросы -- запрещены INSERT запросы - -Также установлены следующие опции: -- [max_result_bytes=10485760](../operations/settings/query-complexity.md#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query-complexity.md#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query-complexity.md#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query-complexity.md#max-execution-time) - -## Примеры {#examples} - -Пример конечной точки HTTPS с `curl`: +HTTPS endpoint example with `curl`: ``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` -Пример конечной точки TCP с [CLI](../interfaces/cli.md): +TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" +clickhouse client --secure --host play.clickhouse.com --user explorer ``` diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md index 2f3f3c0f63c..fe0f7d12893 100644 --- a/docs/ru/operations/clickhouse-keeper.md +++ b/docs/ru/operations/clickhouse-keeper.md @@ -54,7 +54,7 @@ ClickHouse Keeper может использоваться как равноце - `auto_forwarding` — разрешить пересылку запросов на запись от последователей лидеру (по умолчанию: true). - `shutdown_timeout` — время ожидания завершения внутренних подключений и выключения, в миллисекундах (по умолчанию: 5000). - `startup_timeout` — время отключения сервера, если он не подключается к другим участникам кворума, в миллисекундах (по умолчанию: 30000). -- `four_letter_word_white_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro"). +- `four_letter_word_allow_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro"). Конфигурация кворума находится в `.` и содержит описание серверов. @@ -114,7 +114,7 @@ clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon ClickHouse Keeper также поддерживает 4-х буквенные команды, почти такие же, как у Zookeeper. Каждая команда состоит из 4-х символов, например, `mntr`, `stat` и т. д. Несколько интересных команд: `stat` предоставляет общую информацию о сервере и подключенных клиентах, а `srvr` и `cons` предоставляют расширенные сведения о сервере и подключениях соответственно. -У 4-х буквенных команд есть параметр для настройки разрешенного списка `four_letter_word_white_list`, который имеет значение по умолчанию "conf,cons,crst,envi,ruok,srst,srvr,stat, wchc,wchs,dirs,mntr,isro". +У 4-х буквенных команд есть параметр для настройки разрешенного списка `four_letter_word_allow_list`, который имеет значение по умолчанию "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro". Вы можете отправлять команды в ClickHouse Keeper через telnet или nc на порт для клиента. @@ -194,7 +194,7 @@ Server stats reset. ``` server_id=1 tcp_port=2181 -four_letter_word_white_list=* +four_letter_word_allow_list=* log_storage_path=./coordination/logs snapshot_storage_path=./coordination/snapshots max_requests_batch_size=100 diff --git a/docs/ru/sql-reference/data-types/special-data-types/nothing.md b/docs/ru/sql-reference/data-types/special-data-types/nothing.md index 30d425461e1..7a58d52573f 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/nothing.md +++ b/docs/ru/sql-reference/data-types/special-data-types/nothing.md @@ -5,9 +5,9 @@ toc_title: Nothing # Nothing {#nothing} -Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/data-types/special-data-types/nothing.md), т.е. отсутствие значения. +Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/syntax.md#null-literal), т.е. отсутствие значения. -Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/special-data-types/nothing.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов: +Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/nullable.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов: ``` sql SELECT toTypeName(Array()) diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index b9c2a4f0f0b..48cce437b8d 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -110,9 +110,9 @@ SELECT x, toTypeName(x) FROM t1; ### EPHEMERAL {#ephemeral} -`EPHEMERAL expr` +`EPHEMERAL [expr]` -Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. +Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. Если значение по умолчанию `expr` не указано, то тип колонки должен быть специфицирован. INSERT без списка столбцов игнорирует этот столбец, таким образом сохраняется инвариант - т.е. дамп, полученный путём `SELECT *`, можно вставить обратно в таблицу INSERT-ом без указания списка столбцов. ### ALIAS {#alias} diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index 4a409dc7743..fead3c11060 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -20,7 +20,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list [WHERE expr] [GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS] [HAVING expr] -[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] +[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [INTERPOLATE [(expr_list)]] [LIMIT [offset_value, ]n BY columns] [LIMIT [n, ]m] [WITH TIES] [SETTINGS ...] diff --git a/docs/ru/sql-reference/statements/select/order-by.md b/docs/ru/sql-reference/statements/select/order-by.md index 3f52b260423..e293e62e34c 100644 --- a/docs/ru/sql-reference/statements/select/order-by.md +++ b/docs/ru/sql-reference/statements/select/order-by.md @@ -280,6 +280,7 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en'; ```sql ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr] +[INTERPOLATE [(col [AS expr], ... colN [AS exprN])]] ``` `WITH FILL` может быть применен к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами. В случае применения к полям типа `String` недостающие значения заполняются пустой строкой. @@ -289,6 +290,8 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_ Когда `STEP const_numeric_expr` не указан, тогда используется `1.0` для числовых типов, `1 день` для типа Date и `1 секунда` для типа DateTime. +`INTERPOLATE` может быть применен к колонкам, не участвующим в `ORDER BY WITH FILL`. Такие колонки заполняются значениями, вычисляемыми применением `expr` к предыдущему значению. Если `expr` опущен, то колонка заполняется предыдущим значением. Если список колонок не указан, то включаются все разрешенные колонки. + Пример запроса без использования `WITH FILL`: ```sql SELECT n, source FROM ( @@ -395,3 +398,58 @@ ORDER BY │ 1970-03-12 │ 1970-01-08 │ original │ └────────────┴────────────┴──────────┘ ``` + +Пример запроса без `INTERPOLATE`: + +``` sql +SELECT n, source, inter FROM ( + SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter + FROM numbers(10) WHERE number % 3 = 1 +) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5; +``` + +Результат: +``` text +┌───n─┬─source───┬─inter─┐ +│ 0 │ │ 0 │ +│ 0.5 │ │ 0 │ +│ 1 │ original │ 1 │ +│ 1.5 │ │ 0 │ +│ 2 │ │ 0 │ +│ 2.5 │ │ 0 │ +│ 3 │ │ 0 │ +│ 3.5 │ │ 0 │ +│ 4 │ original │ 4 │ +│ 4.5 │ │ 0 │ +│ 5 │ │ 0 │ +│ 5.5 │ │ 0 │ +│ 7 │ original │ 7 │ +└─────┴──────────┴───────┘ +``` + +Тот же запрос с `INTERPOLATE`: + +``` sql +SELECT n, source, inter FROM ( + SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter + FROM numbers(10) WHERE number % 3 = 1 +) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS inter + 1); +``` + +Результат: +``` text +┌───n─┬─source───┬─inter─┐ +│ 0 │ │ 0 │ +│ 0.5 │ │ 0 │ +│ 1 │ original │ 1 │ +│ 1.5 │ │ 2 │ +│ 2 │ │ 3 │ +│ 2.5 │ │ 4 │ +│ 3 │ │ 5 │ +│ 3.5 │ │ 6 │ +│ 4 │ original │ 4 │ +│ 4.5 │ │ 5 │ +│ 5 │ │ 6 │ +│ 5.5 │ │ 7 │ +│ 7 │ original │ 7 │ +└─────┴──────────┴───────┘ diff --git a/docs/ru/sql-reference/table-functions/postgresql.md b/docs/ru/sql-reference/table-functions/postgresql.md index a8ae7cfb80b..e61ca69d78c 100644 --- a/docs/ru/sql-reference/table-functions/postgresql.md +++ b/docs/ru/sql-reference/table-functions/postgresql.md @@ -126,7 +126,7 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) **См. также** -- [Движок таблиц PostgreSQL](../../sql-reference/table-functions/postgresql.md) +- [Движок таблиц PostgreSQL](../../engines/table-engines/integrations/postgresql.md) - [Использование PostgreSQL как источника данных для внешнего словаря](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) [Оригинальная статья](https://clickhouse.com/docs/ru/sql-reference/table-functions/postgresql/) diff --git a/docs/tools/amp.py b/docs/tools/amp.py index 22417407946..584a40c4bba 100644 --- a/docs/tools/amp.py +++ b/docs/tools/amp.py @@ -15,24 +15,24 @@ import website def prepare_amp_html(lang, args, root, site_temp, main_site_dir): src_path = root - src_index = os.path.join(src_path, 'index.html') + src_index = os.path.join(src_path, "index.html") rel_path = os.path.relpath(src_path, site_temp) - dst_path = os.path.join(main_site_dir, rel_path, 'amp') - dst_index = os.path.join(dst_path, 'index.html') + dst_path = os.path.join(main_site_dir, rel_path, "amp") + dst_index = os.path.join(dst_path, "index.html") - logging.debug(f'Generating AMP version for {rel_path} ({lang})') + logging.debug(f"Generating AMP version for {rel_path} ({lang})") os.makedirs(dst_path) - with open(src_index, 'r') as f: + with open(src_index, "r") as f: content = f.read() - css_in = ' '.join(website.get_css_in(args)) + css_in = " ".join(website.get_css_in(args)) command = f"purifycss --min {css_in} '{src_index}'" logging.debug(command) - inline_css = subprocess.check_output(command, shell=True).decode('utf-8') - inline_css = inline_css.replace('!important', '').replace('/*!', '/*') + inline_css = subprocess.check_output(command, shell=True).decode("utf-8") + inline_css = inline_css.replace("!important", "").replace("/*!", "/*") inline_css = cssmin.cssmin(inline_css) - content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css) + content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css) - with open(dst_index, 'w') as f: + with open(dst_index, "w") as f: f.write(content) return dst_index @@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir): def build_amp(lang, args, cfg): # AMP docs: https://amp.dev/documentation/ - logging.info(f'Building AMP version for {lang}') + logging.info(f"Building AMP version for {lang}") with util.temp_dir() as site_temp: - extra = cfg.data['extra'] - main_site_dir = cfg.data['site_dir'] - extra['is_amp'] = True - cfg.load_dict({ - 'site_dir': site_temp, - 'extra': extra - }) + extra = cfg.data["extra"] + main_site_dir = cfg.data["site_dir"] + extra["is_amp"] = True + cfg.load_dict({"site_dir": site_temp, "extra": extra}) try: mkdocs.commands.build.build(cfg) @@ -60,50 +57,49 @@ def build_amp(lang, args, cfg): paths = [] for root, _, filenames in os.walk(site_temp): - if 'index.html' in filenames: - paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir)) - logging.info(f'Finished building AMP version for {lang}') + if "index.html" in filenames: + paths.append( + prepare_amp_html(lang, args, root, site_temp, main_site_dir) + ) + logging.info(f"Finished building AMP version for {lang}") def html_to_amp(content): - soup = bs4.BeautifulSoup( - content, - features='html.parser' - ) + soup = bs4.BeautifulSoup(content, features="html.parser") for tag in soup.find_all(): - if tag.attrs.get('id') == 'tostring': - tag.attrs['id'] = '_tostring' - if tag.name == 'img': - tag.name = 'amp-img' - tag.attrs['layout'] = 'responsive' - src = tag.attrs['src'] - if not (src.startswith('/') or src.startswith('http')): - tag.attrs['src'] = f'../{src}' - if not tag.attrs.get('width'): - tag.attrs['width'] = '640' - if not tag.attrs.get('height'): - tag.attrs['height'] = '320' - if tag.name == 'iframe': - tag.name = 'amp-iframe' - tag.attrs['layout'] = 'responsive' - del tag.attrs['alt'] - del tag.attrs['allowfullscreen'] - if not tag.attrs.get('width'): - tag.attrs['width'] = '640' - if not tag.attrs.get('height'): - tag.attrs['height'] = '320' - elif tag.name == 'a': - href = tag.attrs.get('href') + if tag.attrs.get("id") == "tostring": + tag.attrs["id"] = "_tostring" + if tag.name == "img": + tag.name = "amp-img" + tag.attrs["layout"] = "responsive" + src = tag.attrs["src"] + if not (src.startswith("/") or src.startswith("http")): + tag.attrs["src"] = f"../{src}" + if not tag.attrs.get("width"): + tag.attrs["width"] = "640" + if not tag.attrs.get("height"): + tag.attrs["height"] = "320" + if tag.name == "iframe": + tag.name = "amp-iframe" + tag.attrs["layout"] = "responsive" + del tag.attrs["alt"] + del tag.attrs["allowfullscreen"] + if not tag.attrs.get("width"): + tag.attrs["width"] = "640" + if not tag.attrs.get("height"): + tag.attrs["height"] = "320" + elif tag.name == "a": + href = tag.attrs.get("href") if href: - if not (href.startswith('/') or href.startswith('http')): - if '#' in href: - href, anchor = href.split('#') + if not (href.startswith("/") or href.startswith("http")): + if "#" in href: + href, anchor = href.split("#") else: anchor = None - href = f'../{href}amp/' + href = f"../{href}amp/" if anchor: - href = f'{href}#{anchor}' - tag.attrs['href'] = href + href = f"{href}#{anchor}" + tag.attrs["href"] = href content = str(soup) return website.minify_html(content) diff --git a/docs/tools/blog.py b/docs/tools/blog.py index b58523504a3..d1fc540d8bf 100644 --- a/docs/tools/blog.py +++ b/docs/tools/blog.py @@ -17,54 +17,52 @@ import util def build_for_lang(lang, args): - logging.info(f'Building {lang} blog') + logging.info(f"Building {lang} blog") try: theme_cfg = { - 'name': None, - 'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), - 'language': lang, - 'direction': 'ltr', - 'static_templates': ['404.html'], - 'extra': { - 'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching - } + "name": None, + "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir), + "language": lang, + "direction": "ltr", + "static_templates": ["404.html"], + "extra": { + "now": int( + time.mktime(datetime.datetime.now().timetuple()) + ) # TODO better way to avoid caching + }, } # the following list of languages is sorted according to # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers - languages = { - 'en': 'English' - } + languages = {"en": "English"} - site_names = { - 'en': 'ClickHouse Blog' - } + site_names = {"en": "ClickHouse Blog"} assert len(site_names) == len(languages) site_dir = os.path.join(args.blog_output_dir, lang) - plugins = ['macros'] + plugins = ["macros"] if args.htmlproofer: - plugins.append('htmlproofer') + plugins.append("htmlproofer") - website_url = 'https://clickhouse.com' - site_name = site_names.get(lang, site_names['en']) + website_url = "https://clickhouse.com" + site_name = site_names.get(lang, site_names["en"]) blog_nav, post_meta = nav.build_blog_nav(lang, args) raw_config = dict( site_name=site_name, - site_url=f'{website_url}/blog/{lang}/', + site_url=f"{website_url}/blog/{lang}/", docs_dir=os.path.join(args.blog_dir, lang), site_dir=site_dir, strict=True, theme=theme_cfg, nav=blog_nav, - copyright='©2016–2022 ClickHouse, Inc.', + copyright="©2016–2022 ClickHouse, Inc.", use_directory_urls=True, - repo_name='ClickHouse/ClickHouse', - repo_url='https://github.com/ClickHouse/ClickHouse/', - edit_uri=f'edit/master/website/blog/{lang}', + repo_name="ClickHouse/ClickHouse", + repo_url="https://github.com/ClickHouse/ClickHouse/", + edit_uri=f"edit/master/website/blog/{lang}", markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, plugins=plugins, extra=dict( @@ -75,12 +73,12 @@ def build_for_lang(lang, args): website_url=website_url, events=args.events, languages=languages, - includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), + includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"), is_amp=False, is_blog=True, post_meta=post_meta, - today=datetime.date.today().isoformat() - ) + today=datetime.date.today().isoformat(), + ), ) cfg = config.load_config(**raw_config) @@ -89,21 +87,28 @@ def build_for_lang(lang, args): redirects.build_blog_redirects(args) env = util.init_jinja2_env(args) - with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f: - rss_template_string = f.read().decode('utf-8').strip() + with open( + os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb" + ) as f: + rss_template_string = f.read().decode("utf-8").strip() rss_template = env.from_string(rss_template_string) - with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f: - f.write(rss_template.render({'config': raw_config})) + with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f: + f.write(rss_template.render({"config": raw_config})) - logging.info(f'Finished building {lang} blog') + logging.info(f"Finished building {lang} blog") except exceptions.ConfigurationError as e: - raise SystemExit('\n' + str(e)) + raise SystemExit("\n" + str(e)) def build_blog(args): tasks = [] - for lang in args.blog_lang.split(','): + for lang in args.blog_lang.split(","): if lang: - tasks.append((lang, args,)) + tasks.append( + ( + lang, + args, + ) + ) util.run_function_in_parallel(build_for_lang, tasks, threads=False) diff --git a/docs/tools/build.py b/docs/tools/build.py index e4f6718699a..612be0229d3 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -30,76 +30,76 @@ import website from cmake_in_clickhouse_generator import generate_cmake_flags_files + class ClickHouseMarkdown(markdown.extensions.Extension): class ClickHousePreprocessor(markdown.util.Processor): def run(self, lines): for line in lines: - if '' not in line: + if "" not in line: yield line def extendMarkdown(self, md): - md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31) + md.preprocessors.register( + self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31 + ) markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown def build_for_lang(lang, args): - logging.info(f'Building {lang} docs') - os.environ['SINGLE_PAGE'] = '0' + logging.info(f"Building {lang} docs") + os.environ["SINGLE_PAGE"] = "0" try: theme_cfg = { - 'name': None, - 'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), - 'language': lang, - 'direction': 'rtl' if lang == 'fa' else 'ltr', - 'static_templates': ['404.html'], - 'extra': { - 'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching - } + "name": None, + "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir), + "language": lang, + "direction": "rtl" if lang == "fa" else "ltr", + "static_templates": ["404.html"], + "extra": { + "now": int( + time.mktime(datetime.datetime.now().timetuple()) + ) # TODO better way to avoid caching + }, } # the following list of languages is sorted according to # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers - languages = { - 'en': 'English', - 'zh': '中文', - 'ru': 'Русский', - 'ja': '日本語' - } + languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"} site_names = { - 'en': 'ClickHouse %s Documentation', - 'zh': 'ClickHouse文档 %s', - 'ru': 'Документация ClickHouse %s', - 'ja': 'ClickHouseドキュメント %s' + "en": "ClickHouse %s Documentation", + "zh": "ClickHouse文档 %s", + "ru": "Документация ClickHouse %s", + "ja": "ClickHouseドキュメント %s", } assert len(site_names) == len(languages) site_dir = os.path.join(args.docs_output_dir, lang) - plugins = ['macros'] + plugins = ["macros"] if args.htmlproofer: - plugins.append('htmlproofer') + plugins.append("htmlproofer") - website_url = 'https://clickhouse.com' - site_name = site_names.get(lang, site_names['en']) % '' - site_name = site_name.replace(' ', ' ') + website_url = "https://clickhouse.com" + site_name = site_names.get(lang, site_names["en"]) % "" + site_name = site_name.replace(" ", " ") raw_config = dict( site_name=site_name, - site_url=f'{website_url}/docs/{lang}/', + site_url=f"{website_url}/docs/{lang}/", docs_dir=os.path.join(args.docs_dir, lang), site_dir=site_dir, strict=True, theme=theme_cfg, - copyright='©2016–2022 ClickHouse, Inc.', + copyright="©2016–2022 ClickHouse, Inc.", use_directory_urls=True, - repo_name='ClickHouse/ClickHouse', - repo_url='https://github.com/ClickHouse/ClickHouse/', - edit_uri=f'edit/master/docs/{lang}', + repo_name="ClickHouse/ClickHouse", + repo_url="https://github.com/ClickHouse/ClickHouse/", + edit_uri=f"edit/master/docs/{lang}", markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, plugins=plugins, extra=dict( @@ -111,16 +111,16 @@ def build_for_lang(lang, args): website_url=website_url, events=args.events, languages=languages, - includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), + includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"), is_amp=False, - is_blog=False - ) + is_blog=False, + ), ) # Clean to be safe if last build finished abnormally single_page.remove_temporary_files(lang, args) - raw_config['nav'] = nav.build_docs_nav(lang, args) + raw_config["nav"] = nav.build_docs_nav(lang, args) cfg = config.load_config(**raw_config) @@ -131,21 +131,28 @@ def build_for_lang(lang, args): amp.build_amp(lang, args, cfg) if not args.skip_single_page: - single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg) + single_page.build_single_page_version( + lang, args, raw_config.get("nav"), cfg + ) mdx_clickhouse.PatchedMacrosPlugin.disabled = False - logging.info(f'Finished building {lang} docs') + logging.info(f"Finished building {lang} docs") except exceptions.ConfigurationError as e: - raise SystemExit('\n' + str(e)) + raise SystemExit("\n" + str(e)) def build_docs(args): tasks = [] - for lang in args.lang.split(','): + for lang in args.lang.split(","): if lang: - tasks.append((lang, args,)) + tasks.append( + ( + lang, + args, + ) + ) util.run_function_in_parallel(build_for_lang, tasks, threads=False) redirects.build_docs_redirects(args) @@ -171,56 +178,64 @@ def build(args): redirects.build_static_redirects(args) -if __name__ == '__main__': - os.chdir(os.path.join(os.path.dirname(__file__), '..')) +if __name__ == "__main__": + os.chdir(os.path.join(os.path.dirname(__file__), "..")) # A root path to ClickHouse source code. - src_dir = '..' + src_dir = ".." - website_dir = os.path.join(src_dir, 'website') + website_dir = os.path.join(src_dir, "website") arg_parser = argparse.ArgumentParser() - arg_parser.add_argument('--lang', default='en,ru,zh,ja') - arg_parser.add_argument('--blog-lang', default='en') - arg_parser.add_argument('--docs-dir', default='.') - arg_parser.add_argument('--theme-dir', default=website_dir) - arg_parser.add_argument('--website-dir', default=website_dir) - arg_parser.add_argument('--src-dir', default=src_dir) - arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog')) - arg_parser.add_argument('--output-dir', default='build') - arg_parser.add_argument('--nav-limit', type=int, default='0') - arg_parser.add_argument('--skip-multi-page', action='store_true') - arg_parser.add_argument('--skip-single-page', action='store_true') - arg_parser.add_argument('--skip-amp', action='store_true') - arg_parser.add_argument('--skip-website', action='store_true') - arg_parser.add_argument('--skip-blog', action='store_true') - arg_parser.add_argument('--skip-git-log', action='store_true') - arg_parser.add_argument('--skip-docs', action='store_true') - arg_parser.add_argument('--test-only', action='store_true') - arg_parser.add_argument('--minify', action='store_true') - arg_parser.add_argument('--htmlproofer', action='store_true') - arg_parser.add_argument('--no-docs-macros', action='store_true') - arg_parser.add_argument('--save-raw-single-page', type=str) - arg_parser.add_argument('--livereload', type=int, default='0') - arg_parser.add_argument('--verbose', action='store_true') + arg_parser.add_argument("--lang", default="en,ru,zh,ja") + arg_parser.add_argument("--blog-lang", default="en") + arg_parser.add_argument("--docs-dir", default=".") + arg_parser.add_argument("--theme-dir", default=website_dir) + arg_parser.add_argument("--website-dir", default=website_dir) + arg_parser.add_argument("--src-dir", default=src_dir) + arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog")) + arg_parser.add_argument("--output-dir", default="build") + arg_parser.add_argument("--nav-limit", type=int, default="0") + arg_parser.add_argument("--skip-multi-page", action="store_true") + arg_parser.add_argument("--skip-single-page", action="store_true") + arg_parser.add_argument("--skip-amp", action="store_true") + arg_parser.add_argument("--skip-website", action="store_true") + arg_parser.add_argument("--skip-blog", action="store_true") + arg_parser.add_argument("--skip-git-log", action="store_true") + arg_parser.add_argument("--skip-docs", action="store_true") + arg_parser.add_argument("--test-only", action="store_true") + arg_parser.add_argument("--minify", action="store_true") + arg_parser.add_argument("--htmlproofer", action="store_true") + arg_parser.add_argument("--no-docs-macros", action="store_true") + arg_parser.add_argument("--save-raw-single-page", type=str) + arg_parser.add_argument("--livereload", type=int, default="0") + arg_parser.add_argument("--verbose", action="store_true") args = arg_parser.parse_args() args.minify = False # TODO remove logging.basicConfig( - level=logging.DEBUG if args.verbose else logging.INFO, - stream=sys.stderr + level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr ) - logging.getLogger('MARKDOWN').setLevel(logging.INFO) + logging.getLogger("MARKDOWN").setLevel(logging.INFO) - args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs') - args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog') + args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs") + args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog") from github import get_events - args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip() - args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip() - args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}' + + args.rev = ( + subprocess.check_output("git rev-parse HEAD", shell=True) + .decode("utf-8") + .strip() + ) + args.rev_short = ( + subprocess.check_output("git rev-parse --short HEAD", shell=True) + .decode("utf-8") + .strip() + ) + args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}" args.events = get_events(args) if args.test_only: @@ -233,18 +248,20 @@ if __name__ == '__main__': mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True from build import build + build(args) if args.livereload: - new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')] - new_args = sys.executable + ' ' + ' '.join(new_args) + new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")] + new_args = sys.executable + " " + " ".join(new_args) server = livereload.Server() - server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True)) - server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True)) - server.serve( - root=args.output_dir, - host='0.0.0.0', - port=args.livereload + server.watch( + args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True) ) + server.watch( + args.website_dir + "**/*", + livereload.shell(new_args, cwd="tools", shell=True), + ) + server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload) sys.exit(0) diff --git a/docs/tools/cmake_in_clickhouse_generator.py b/docs/tools/cmake_in_clickhouse_generator.py index aa4cbbddd18..9bbc94fd206 100644 --- a/docs/tools/cmake_in_clickhouse_generator.py +++ b/docs/tools/cmake_in_clickhouse_generator.py @@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict Entity = Tuple[str, str, str] # https://regex101.com/r/R6iogw/12 -cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$" +cmake_option_regex: str = ( + r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$" +) ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/" -name_str: str = "[`{name}`](" + ch_master_url + "{path}#L{line})" +name_str: str = '[`{name}`](' + ch_master_url + "{path}#L{line})" default_anchor_str: str = "[`{name}`](#{anchor})" comment_var_regex: str = r"\${(.+)}" @@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {} def make_anchor(t: str) -> str: - return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]) + return "".join( + ["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"] + ) + def process_comment(comment: str) -> str: return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE) + def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None: (line, comment) = line_comment (name, description, default) = entity @@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No formatted_default: str = "`" + default + "`" formatted_name: str = name_str.format( - anchor=make_anchor(name), - name=name, - path=path, - line=line) + anchor=make_anchor(name), name=name, path=path, line=line + ) formatted_description: str = "".join(description.split("\n")) formatted_comment: str = process_comment(comment) formatted_entity: str = "| {} | {} | {} | {} |".format( - formatted_name, formatted_default, formatted_description, formatted_comment) + formatted_name, formatted_default, formatted_description, formatted_comment + ) entities[name] = path, formatted_entity + def process_file(root_path: str, file_path: str, file_name: str) -> None: - with open(os.path.join(file_path, file_name), 'r') as cmake_file: + with open(os.path.join(file_path, file_name), "r") as cmake_file: contents: str = cmake_file.read() def get_line_and_comment(target: str) -> Tuple[int, str]: @@ -70,10 +76,10 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None: comment: str = "" for n, line in enumerate(contents_list): - if 'option' not in line.lower() or target not in line: + if "option" not in line.lower() or target not in line: continue - for maybe_comment_line in contents_list[n - 1::-1]: + for maybe_comment_line in contents_list[n - 1 :: -1]: if not re.match("\s*#\s*", maybe_comment_line): break @@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None: # line numbering starts with 1 return n + 1, comment - matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE) + matches: Optional[List[Entity]] = re.findall( + cmake_option_regex, contents, re.MULTILINE + ) - - file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name) - if file_rel_path_with_name.startswith('/'): + file_rel_path_with_name: str = os.path.join( + file_path[len(root_path) :], file_name + ) + if file_rel_path_with_name.startswith("/"): file_rel_path_with_name = file_rel_path_with_name[1:] if matches: for entity in matches: - build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0])) + build_entity( + file_rel_path_with_name, entity, get_line_and_comment(entity[0]) + ) + def process_folder(root_path: str, name: str) -> None: for root, _, files in os.walk(os.path.join(root_path, name)): @@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None: if f == "CMakeLists.txt" or ".cmake" in f: process_file(root_path, root, f) -def generate_cmake_flags_files() -> None: - root_path: str = os.path.join(os.path.dirname(__file__), '..', '..') - output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md") - header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md") - footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md") +def generate_cmake_flags_files() -> None: + root_path: str = os.path.join(os.path.dirname(__file__), "..", "..") + + output_file_name: str = os.path.join( + root_path, "docs/en/development/cmake-in-clickhouse.md" + ) + header_file_name: str = os.path.join( + root_path, "docs/_includes/cmake_in_clickhouse_header.md" + ) + footer_file_name: str = os.path.join( + root_path, "docs/_includes/cmake_in_clickhouse_footer.md" + ) process_file(root_path, root_path, "CMakeLists.txt") process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt") @@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None: f.write(entities[k][1] + "\n") ignored_keys.append(k) - f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" + - table_header) + f.write( + "\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" + + table_header + ) for k in sorted_keys: if k.startswith("ENABLE_") and ".cmake" in entities[k][0]: @@ -143,15 +164,18 @@ def generate_cmake_flags_files() -> None: with open(footer_file_name, "r") as footer: f.write(footer.read()) - other_languages = ["docs/ja/development/cmake-in-clickhouse.md", - "docs/zh/development/cmake-in-clickhouse.md", - "docs/ru/development/cmake-in-clickhouse.md"] + other_languages = [ + "docs/ja/development/cmake-in-clickhouse.md", + "docs/zh/development/cmake-in-clickhouse.md", + "docs/ru/development/cmake-in-clickhouse.md", + ] for lang in other_languages: other_file_name = os.path.join(root_path, lang) if os.path.exists(other_file_name): - os.unlink(other_file_name) + os.unlink(other_file_name) os.symlink(output_file_name, other_file_name) -if __name__ == '__main__': + +if __name__ == "__main__": generate_cmake_flags_files() diff --git a/docs/tools/easy_diff.py b/docs/tools/easy_diff.py index 22d305d3da3..14e3ca91776 100755 --- a/docs/tools/easy_diff.py +++ b/docs/tools/easy_diff.py @@ -8,7 +8,7 @@ import contextlib from git import cmd from tempfile import NamedTemporaryFile -SCRIPT_DESCRIPTION = ''' +SCRIPT_DESCRIPTION = """ usage: ./easy_diff.py language/document path Show the difference between a language document and an English document. @@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = ''' OPTIONS: -h, --help show this help message and exit --no-pager use stdout as difference result output -''' +""" SCRIPT_PATH = os.path.abspath(__file__) -CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..') +CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..") SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME) SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False) -SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None) -SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False) -SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False) +SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None) +SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False) +SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False) def execute(commands): @@ -70,19 +70,41 @@ def execute(commands): def get_hash(file_name): - return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name]) + return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name]) def diff_file(reference_file, working_file, out): if not os.path.exists(reference_file): - raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.') + raise RuntimeError( + "reference file [" + os.path.abspath(reference_file) + "] is not exists." + ) if os.path.islink(working_file): out.writelines(["Need translate document:" + os.path.abspath(reference_file)]) elif not os.path.exists(working_file): - out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)]) + out.writelines( + [ + "Need link document " + + os.path.abspath(reference_file) + + " to " + + os.path.abspath(working_file) + ] + ) elif get_hash(working_file) != get_hash(reference_file): - out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))]) + out.writelines( + [ + ( + execute( + [ + "git", + "diff", + get_hash(working_file).strip('"'), + reference_file, + ] + ).encode("utf-8") + ) + ] + ) return 0 @@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out): for list_item in os.listdir(reference_directory): working_item = os.path.join(working_directory, list_item) reference_item = os.path.join(reference_directory, list_item) - if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0: + if ( + diff_file(reference_item, working_item, out) + if os.path.isfile(reference_item) + else diff_directory(reference_item, working_item, out) != 0 + ): return 1 return 0 -def find_language_doc(custom_document, other_language='en', children=[]): +def find_language_doc(custom_document, other_language="en", children=[]): if len(custom_document) == 0: - raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.") + raise RuntimeError( + "The " + + os.path.join(custom_document, *children) + + " is not in docs directory." + ) - if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document): - return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:]) + if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document): + return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:]) children.insert(0, os.path.split(custom_document)[1]) - return find_language_doc(os.path.split(custom_document)[0], other_language, children) + return find_language_doc( + os.path.split(custom_document)[0], other_language, children + ) class ToPager: @@ -119,7 +151,7 @@ class ToPager: def close(self): self.temp_named_file.flush() - git_pager = execute(['git', 'var', 'GIT_PAGER']) + git_pager = execute(["git", "var", "GIT_PAGER"]) subprocess.check_call([git_pager, self.temp_named_file.name]) self.temp_named_file.close() @@ -135,12 +167,20 @@ class ToStdOut: self.system_stdout_stream = system_stdout_stream -if __name__ == '__main__': +if __name__ == "__main__": arguments = SCRIPT_COMMAND_PARSER.parse_args() if arguments.help or not arguments.path: sys.stdout.write(SCRIPT_DESCRIPTION) sys.exit(0) - working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path) - with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer: - exit(diff_directory(find_language_doc(working_language), working_language, writer)) + working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path) + with contextlib.closing( + ToStdOut(sys.stdout) + if arguments.no_pager + else ToPager(NamedTemporaryFile("r+")) + ) as writer: + exit( + diff_directory( + find_language_doc(working_language), working_language, writer + ) + ) diff --git a/docs/tools/github.py b/docs/tools/github.py index 465695d1512..3a6f155e25d 100644 --- a/docs/tools/github.py +++ b/docs/tools/github.py @@ -16,27 +16,26 @@ import util def get_events(args): events = [] skip = True - with open(os.path.join(args.docs_dir, '..', 'README.md')) as f: + with open(os.path.join(args.docs_dir, "..", "README.md")) as f: for line in f: if skip: - if 'Upcoming Events' in line: + if "Upcoming Events" in line: skip = False else: if not line: continue - line = line.strip().split('](') + line = line.strip().split("](") if len(line) == 2: - tail = line[1].split(') ') - events.append({ - 'signup_link': tail[0], - 'event_name': line[0].replace('* [', ''), - 'event_date': tail[1].replace('on ', '').replace('.', '') - }) + tail = line[1].split(") ") + events.append( + { + "signup_link": tail[0], + "event_name": line[0].replace("* [", ""), + "event_date": tail[1].replace("on ", "").replace(".", ""), + } + ) return events -if __name__ == '__main__': - logging.basicConfig( - level=logging.DEBUG, - stream=sys.stderr - ) +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index 18ecc890b6e..6b5a5bb5813 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -16,74 +16,79 @@ import slugify as slugify_impl def slugify(value, separator): - return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) + return slugify_impl.slugify( + value, separator=separator, word_boundary=True, save_order=True + ) MARKDOWN_EXTENSIONS = [ - 'mdx_clickhouse', - 'admonition', - 'attr_list', - 'def_list', - 'codehilite', - 'nl2br', - 'sane_lists', - 'pymdownx.details', - 'pymdownx.magiclink', - 'pymdownx.superfences', - 'extra', - { - 'toc': { - 'permalink': True, - 'slugify': slugify - } - } + "mdx_clickhouse", + "admonition", + "attr_list", + "def_list", + "codehilite", + "nl2br", + "sane_lists", + "pymdownx.details", + "pymdownx.magiclink", + "pymdownx.superfences", + "extra", + {"toc": {"permalink": True, "slugify": slugify}}, ] class ClickHouseLinkMixin(object): - def handleMatch(self, m, data): - single_page = (os.environ.get('SINGLE_PAGE') == '1') + single_page = os.environ.get("SINGLE_PAGE") == "1" try: el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data) except IndexError: return if el is not None: - href = el.get('href') or '' - is_external = href.startswith('http:') or href.startswith('https:') + href = el.get("href") or "" + is_external = href.startswith("http:") or href.startswith("https:") if is_external: - if not href.startswith('https://clickhouse.com'): - el.set('rel', 'external nofollow noreferrer') + if not href.startswith("https://clickhouse.com"): + el.set("rel", "external nofollow noreferrer") elif single_page: - if '#' in href: - el.set('href', '#' + href.split('#', 1)[1]) + if "#" in href: + el.set("href", "#" + href.split("#", 1)[1]) else: - el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/')) + el.set( + "href", "#" + href.replace("/index.md", "/").replace(".md", "/") + ) return el, start, end -class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor): +class ClickHouseAutolinkPattern( + ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor +): pass -class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor): +class ClickHouseLinkPattern( + ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor +): pass class ClickHousePreprocessor(markdown.util.Processor): def run(self, lines): for line in lines: - if '' not in line: + if "" not in line: yield line class ClickHouseMarkdown(markdown.extensions.Extension): - def extendMarkdown(self, md, md_globals): - md.preprocessors['clickhouse'] = ClickHousePreprocessor() - md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md) - md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md) + md.preprocessors["clickhouse"] = ClickHousePreprocessor() + md.inlinePatterns["link"] = ClickHouseLinkPattern( + markdown.inlinepatterns.LINK_RE, md + ) + md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern( + markdown.inlinepatterns.AUTOLINK_RE, md + ) def makeExtension(**kwargs): @@ -92,10 +97,8 @@ def makeExtension(**kwargs): def get_translations(dirname, lang): import babel.support - return babel.support.Translations.load( - dirname=dirname, - locales=[lang, 'en'] - ) + + return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"]) class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): @@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): def on_config(self, config): super(PatchedMacrosPlugin, self).on_config(config) - self.env.comment_start_string = '{##' - self.env.comment_end_string = '##}' - self.env.loader = jinja2.FileSystemLoader([ - os.path.join(config.data['site_dir']), - os.path.join(config.data['extra']['includes_dir']) - ]) + self.env.comment_start_string = "{##" + self.env.comment_end_string = "##}" + self.env.loader = jinja2.FileSystemLoader( + [ + os.path.join(config.data["site_dir"]), + os.path.join(config.data["extra"]["includes_dir"]), + ] + ) def on_env(self, env, config, files): import util - env.add_extension('jinja2.ext.i18n') - dirname = os.path.join(config.data['theme'].dirs[0], 'locale') - lang = config.data['theme']['language'] - env.install_gettext_translations( - get_translations(dirname, lang), - newstyle=True - ) + + env.add_extension("jinja2.ext.i18n") + dirname = os.path.join(config.data["theme"].dirs[0], "locale") + lang = config.data["theme"]["language"] + env.install_gettext_translations(get_translations(dirname, lang), newstyle=True) util.init_jinja2_filters(env) return env @@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): return markdown def on_page_markdown(self, markdown, page, config, files): - markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files) + markdown = super(PatchedMacrosPlugin, self).on_page_markdown( + markdown, page, config, files + ) if os.path.islink(page.file.abs_src_path): - lang = config.data['theme']['language'] - page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1) + lang = config.data["theme"]["language"] + page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1) - if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'): + if config.data["extra"].get("version_prefix") or config.data["extra"].get( + "single_page" + ): return markdown if self.skip_git_log: return markdown diff --git a/docs/tools/nav.py b/docs/tools/nav.py index db64d1ba404..e3df85bbe4e 100644 --- a/docs/tools/nav.py +++ b/docs/tools/nav.py @@ -10,57 +10,59 @@ import util def find_first_header(content): - for line in content.split('\n'): - if line.startswith('#'): - no_hash = line.lstrip('#') - return no_hash.split('{', 1)[0].strip() + for line in content.split("\n"): + if line.startswith("#"): + no_hash = line.lstrip("#") + return no_hash.split("{", 1)[0].strip() def build_nav_entry(root, args): - if root.endswith('images'): + if root.endswith("images"): return None, None, None result_items = [] - index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md')) - current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title')) - current_title = current_title or index_meta.get('title', find_first_header(index_content)) + index_meta, index_content = util.read_md_file(os.path.join(root, "index.md")) + current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title")) + current_title = current_title or index_meta.get( + "title", find_first_header(index_content) + ) for filename in os.listdir(root): path = os.path.join(root, filename) if os.path.isdir(path): prio, title, payload = build_nav_entry(path, args) if title and payload: result_items.append((prio, title, payload)) - elif filename.endswith('.md'): + elif filename.endswith(".md"): path = os.path.join(root, filename) - meta = '' - content = '' + meta = "" + content = "" try: meta, content = util.read_md_file(path) except: - print('Error in file: {}'.format(path)) + print("Error in file: {}".format(path)) raise - path = path.split('/', 2)[-1] - title = meta.get('toc_title', find_first_header(content)) + path = path.split("/", 2)[-1] + title = meta.get("toc_title", find_first_header(content)) if title: - title = title.strip().rstrip('.') + title = title.strip().rstrip(".") else: - title = meta.get('toc_folder_title', 'hidden') - prio = meta.get('toc_priority', 9999) - logging.debug(f'Nav entry: {prio}, {title}, {path}') - if meta.get('toc_hidden') or not content.strip(): - title = 'hidden' - if title == 'hidden': - title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest() + title = meta.get("toc_folder_title", "hidden") + prio = meta.get("toc_priority", 9999) + logging.debug(f"Nav entry: {prio}, {title}, {path}") + if meta.get("toc_hidden") or not content.strip(): + title = "hidden" + if title == "hidden": + title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest() if args.nav_limit and len(result_items) >= args.nav_limit: break result_items.append((prio, title, path)) result_items = sorted(result_items, key=lambda x: (x[0], x[1])) result = collections.OrderedDict([(item[1], item[2]) for item in result_items]) - if index_meta.get('toc_hidden_folder'): - current_title += '|hidden-folder' - return index_meta.get('toc_priority', 10000), current_title, result + if index_meta.get("toc_hidden_folder"): + current_title += "|hidden-folder" + return index_meta.get("toc_priority", 10000), current_title, result def build_docs_nav(lang, args): @@ -70,7 +72,7 @@ def build_docs_nav(lang, args): index_key = None for key, value in list(nav.items()): if key and value: - if value == 'index.md': + if value == "index.md": index_key = key continue result.append({key: value}) @@ -78,7 +80,7 @@ def build_docs_nav(lang, args): break if index_key: key = list(result[0].keys())[0] - result[0][key][index_key] = 'index.md' + result[0][key][index_key] = "index.md" result[0][key].move_to_end(index_key, last=False) return result @@ -86,7 +88,7 @@ def build_docs_nav(lang, args): def build_blog_nav(lang, args): blog_dir = os.path.join(args.blog_dir, lang) years = sorted(os.listdir(blog_dir), reverse=True) - result_nav = [{'hidden': 'index.md'}] + result_nav = [{"hidden": "index.md"}] post_meta = collections.OrderedDict() for year in years: year_dir = os.path.join(blog_dir, year) @@ -97,38 +99,53 @@ def build_blog_nav(lang, args): post_meta_items = [] for post in os.listdir(year_dir): post_path = os.path.join(year_dir, post) - if not post.endswith('.md'): - raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}') + if not post.endswith(".md"): + raise RuntimeError( + f"Unexpected non-md file in posts folder: {post_path}" + ) meta, _ = util.read_md_file(post_path) - post_date = meta['date'] - post_title = meta['title'] + post_date = meta["date"] + post_title = meta["title"] if datetime.date.fromisoformat(post_date) > datetime.date.today(): continue posts.append( - (post_date, post_title, os.path.join(year, post),) + ( + post_date, + post_title, + os.path.join(year, post), + ) ) if post_title in post_meta: - raise RuntimeError(f'Duplicate post title: {post_title}') - if not post_date.startswith(f'{year}-'): - raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}') - post_url_part = post.replace('.md', '') - post_meta_items.append((post_date, { - 'date': post_date, - 'title': post_title, - 'image': meta.get('image'), - 'url': f'/blog/{lang}/{year}/{post_url_part}/' - },)) + raise RuntimeError(f"Duplicate post title: {post_title}") + if not post_date.startswith(f"{year}-"): + raise RuntimeError( + f"Post date {post_date} doesn't match the folder year {year}: {post_title}" + ) + post_url_part = post.replace(".md", "") + post_meta_items.append( + ( + post_date, + { + "date": post_date, + "title": post_title, + "image": meta.get("image"), + "url": f"/blog/{lang}/{year}/{post_url_part}/", + }, + ) + ) for _, title, path in sorted(posts, reverse=True): result_nav[-1][year][title] = path - for _, post_meta_item in sorted(post_meta_items, - reverse=True, - key=lambda item: item[0]): - post_meta[post_meta_item['title']] = post_meta_item + for _, post_meta_item in sorted( + post_meta_items, reverse=True, key=lambda item: item[0] + ): + post_meta[post_meta_item["title"]] = post_meta_item return result_nav, post_meta def _custom_get_navigation(files, config): - nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages()) + nav_config = config["nav"] or mkdocs.structure.nav.nest_paths( + f.src_path for f in files.documentation_pages() + ) items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config) if not isinstance(items, list): items = [items] @@ -138,19 +155,25 @@ def _custom_get_navigation(files, config): mkdocs.structure.nav._add_previous_and_next_links(pages) mkdocs.structure.nav._add_parent_links(items) - missing_from_config = [file for file in files.documentation_pages() if file.page is None] + missing_from_config = [ + file for file in files.documentation_pages() if file.page is None + ] if missing_from_config: - files._files = [file for file in files._files if file not in missing_from_config] + files._files = [ + file for file in files._files if file not in missing_from_config + ] links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link) for link in links: - scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url) + scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse( + link.url + ) if scheme or netloc: mkdocs.structure.nav.log.debug( "An external link to '{}' is included in " "the 'nav' configuration.".format(link.url) ) - elif link.url.startswith('/'): + elif link.url.startswith("/"): mkdocs.structure.nav.log.debug( "An absolute path to '{}' is included in the 'nav' configuration, " "which presumably points to an external resource.".format(link.url) diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py index 1f0a3bb4b74..5d222376683 100644 --- a/docs/tools/redirects.py +++ b/docs/tools/redirects.py @@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url): os.makedirs(out_dir) except OSError: pass - with open(out_path, 'w') as f: - f.write(f''' + with open(out_path, "w") as f: + f.write( + f""" @@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url): If you are not redirected automatically, follow this link. -''') +""" + ) def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path): out_path = os.path.join( - output_dir, lang, - from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') + output_dir, + lang, + from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"), ) - target_path = to_path.replace('/index.md', '/').replace('.md', '/') + target_path = to_path.replace("/index.md", "/").replace(".md", "/") - if target_path[0:7] != 'http://' and target_path[0:8] != 'https://': - to_url = f'/{base_prefix}/{lang}/{target_path}' + if target_path[0:7] != "http://" and target_path[0:8] != "https://": + to_url = f"/{base_prefix}/{lang}/{target_path}" else: to_url = target_path @@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path) def build_docs_redirects(args): - with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f: + with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f: for line in f: - for lang in args.lang.split(','): - from_path, to_path = line.split(' ', 1) - build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path) + for lang in args.lang.split(","): + from_path, to_path = line.split(" ", 1) + build_redirect_html( + args, "docs", lang, args.docs_output_dir, from_path, to_path + ) def build_blog_redirects(args): - for lang in args.blog_lang.split(','): - redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt') + for lang in args.blog_lang.split(","): + redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt") if os.path.exists(redirects_path): - with open(redirects_path, 'r') as f: + with open(redirects_path, "r") as f: for line in f: - from_path, to_path = line.split(' ', 1) - build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path) + from_path, to_path = line.split(" ", 1) + build_redirect_html( + args, "blog", lang, args.blog_output_dir, from_path, to_path + ) def build_static_redirects(args): for static_redirect in [ - ('benchmark.html', '/benchmark/dbms/'), - ('benchmark_hardware.html', '/benchmark/hardware/'), - ('tutorial.html', '/docs/en/getting_started/tutorial/',), - ('reference_en.html', '/docs/en/single/', ), - ('reference_ru.html', '/docs/ru/single/',), - ('docs/index.html', '/docs/en/',), + ("benchmark.html", "/benchmark/dbms/"), + ("benchmark_hardware.html", "/benchmark/hardware/"), + ( + "tutorial.html", + "/docs/en/getting_started/tutorial/", + ), + ( + "reference_en.html", + "/docs/en/single/", + ), + ( + "reference_ru.html", + "/docs/ru/single/", + ), + ( + "docs/index.html", + "/docs/en/", + ), ]: write_redirect_html( - os.path.join(args.output_dir, static_redirect[0]), - static_redirect[1] + os.path.join(args.output_dir, static_redirect[0]), static_redirect[1] ) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 4e0789b5d24..dd641c13629 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -1,4 +1,4 @@ -Babel==2.8.0 +Babel==2.9.1 backports-abc==0.5 backports.functools-lru-cache==1.6.1 beautifulsoup4==4.9.1 @@ -10,22 +10,22 @@ cssmin==0.2.0 future==0.18.2 htmlmin==0.1.12 idna==2.10 -Jinja2>=2.11.3 +Jinja2==3.0.3 jinja2-highlight==0.6.1 jsmin==3.0.0 -livereload==2.6.2 +livereload==2.6.3 Markdown==3.3.2 -MarkupSafe==1.1.1 -mkdocs==1.1.2 +MarkupSafe==2.1.0 +mkdocs==1.3.0 mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.20 -nltk==3.5 +nltk==3.7 nose==1.3.7 protobuf==3.14.0 numpy==1.21.2 pymdown-extensions==8.0 python-slugify==4.0.1 -PyYAML==5.4.1 +PyYAML==6.0 repackage==0.7.3 requests==2.25.1 singledispatch==3.4.0.3 @@ -34,5 +34,6 @@ soupsieve==2.0.1 termcolor==1.1.0 tornado==6.1 Unidecode==1.1.1 -urllib3>=1.26.5 -Pygments>=2.7.4 +urllib3>=1.26.8 +Pygments>=2.11.2 + diff --git a/docs/tools/single_page.py b/docs/tools/single_page.py index 3d32ba30a21..ed285fce9f8 100644 --- a/docs/tools/single_page.py +++ b/docs/tools/single_page.py @@ -12,7 +12,8 @@ import test import util import website -TEMPORARY_FILE_NAME = 'single.md' +TEMPORARY_FILE_NAME = "single.md" + def recursive_values(item): if isinstance(item, dict): @@ -25,11 +26,14 @@ def recursive_values(item): yield item -anchor_not_allowed_chars = re.compile(r'[^\w\-]') -def generate_anchor_from_path(path): - return re.sub(anchor_not_allowed_chars, '-', path) +anchor_not_allowed_chars = re.compile(r"[^\w\-]") -absolute_link = re.compile(r'^https?://') + +def generate_anchor_from_path(path): + return re.sub(anchor_not_allowed_chars, "-", path) + + +absolute_link = re.compile(r"^https?://") def replace_link(match, path): @@ -40,46 +44,55 @@ def replace_link(match, path): if re.search(absolute_link, link): return match.group(0) - if link.endswith('/'): - link = link[0:-1] + '.md' + if link.endswith("/"): + link = link[0:-1] + ".md" - return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link)))) + return "{}(#{})".format( + title, + generate_anchor_from_path( + os.path.normpath(os.path.join(os.path.dirname(path), link)) + ), + ) # Concatenates Markdown files to a single file. def concatenate(lang, docs_path, single_page_file, nav): lang_path = os.path.join(docs_path, lang) - proj_config = f'{docs_path}/toc_{lang}.yml' + proj_config = f"{docs_path}/toc_{lang}.yml" if os.path.exists(proj_config): with open(proj_config) as cfg_file: - nav = yaml.full_load(cfg_file.read())['nav'] + nav = yaml.full_load(cfg_file.read())["nav"] files_to_concatenate = list(recursive_values(nav)) files_count = len(files_to_concatenate) - logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.') - logging.debug('Concatenating: ' + ', '.join(files_to_concatenate)) - assert files_count > 0, f'Empty single-page for {lang}' + logging.info( + f"{files_count} files will be concatenated into single md-file for {lang}." + ) + logging.debug("Concatenating: " + ", ".join(files_to_concatenate)) + assert files_count > 0, f"Empty single-page for {lang}" - link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)') + link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)") for path in files_to_concatenate: try: with open(os.path.join(lang_path, path)) as f: # Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file. - single_page_file.write('\n______\n\n' % generate_anchor_from_path(path)) + single_page_file.write( + '\n______\n\n' % generate_anchor_from_path(path) + ) in_metadata = False for line in f: # Skip YAML metadata. - if line == '---\n': + if line == "---\n": in_metadata = not in_metadata continue if not in_metadata: # Increase the level of headers. - if line.startswith('#'): - line = '#' + line + if line.startswith("#"): + line = "#" + line # Replace links within the docs. @@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav): line = re.sub( link_regexp, lambda match: replace_link(match, path), - line) + line, + ) # If failed to replace the relative link, print to log # But with some exceptions: # - "../src/" -- for cmake-in-clickhouse.md (link to sources) # - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo" - if '../' in line and (not '../usr/share' in line) and (not '../src/' in line): - logging.info('Failed to resolve relative link:') + if ( + "../" in line + and (not "../usr/share" in line) + and (not "../src/" in line) + ): + logging.info("Failed to resolve relative link:") logging.info(path) logging.info(line) @@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav): single_page_file.flush() + def get_temporary_file_name(lang, args): return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME) + def remove_temporary_files(lang, args): single_md_path = get_temporary_file_name(lang, args) if os.path.exists(single_md_path): @@ -115,14 +135,14 @@ def remove_temporary_files(lang, args): def build_single_page_version(lang, args, nav, cfg): - logging.info(f'Building single page version for {lang}') - os.environ['SINGLE_PAGE'] = '1' - extra = cfg.data['extra'] - extra['single_page'] = True - extra['is_amp'] = False + logging.info(f"Building single page version for {lang}") + os.environ["SINGLE_PAGE"] = "1" + extra = cfg.data["extra"] + extra["single_page"] = True + extra["is_amp"] = False single_md_path = get_temporary_file_name(lang, args) - with open(single_md_path, 'w') as single_md: + with open(single_md_path, "w") as single_md: concatenate(lang, args.docs_dir, single_md, nav) with util.temp_dir() as site_temp: @@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg): shutil.copytree(docs_src_lang, docs_temp_lang) for root, _, filenames in os.walk(docs_temp_lang): for filename in filenames: - if filename != 'single.md' and filename.endswith('.md'): + if filename != "single.md" and filename.endswith(".md"): os.unlink(os.path.join(root, filename)) - cfg.load_dict({ - 'docs_dir': docs_temp_lang, - 'site_dir': site_temp, - 'extra': extra, - 'nav': [ - {cfg.data.get('site_name'): 'single.md'} - ] - }) + cfg.load_dict( + { + "docs_dir": docs_temp_lang, + "site_dir": site_temp, + "extra": extra, + "nav": [{cfg.data.get("site_name"): "single.md"}], + } + ) if not args.test_only: mkdocs.commands.build.build(cfg) - single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single') + single_page_output_path = os.path.join( + args.docs_dir, args.docs_output_dir, lang, "single" + ) if os.path.exists(single_page_output_path): shutil.rmtree(single_page_output_path) shutil.copytree( - os.path.join(site_temp, 'single'), - single_page_output_path + os.path.join(site_temp, "single"), single_page_output_path ) - single_page_index_html = os.path.join(single_page_output_path, 'index.html') - single_page_content_js = os.path.join(single_page_output_path, 'content.js') + single_page_index_html = os.path.join( + single_page_output_path, "index.html" + ) + single_page_content_js = os.path.join( + single_page_output_path, "content.js" + ) - with open(single_page_index_html, 'r') as f: - sp_prefix, sp_js, sp_suffix = f.read().split('') + with open(single_page_index_html, "r") as f: + sp_prefix, sp_js, sp_suffix = f.read().split("") - with open(single_page_index_html, 'w') as f: + with open(single_page_index_html, "w") as f: f.write(sp_prefix) f.write(sp_suffix) - with open(single_page_content_js, 'w') as f: + with open(single_page_content_js, "w") as f: if args.minify: import jsmin + sp_js = jsmin.jsmin(sp_js) f.write(sp_js) - logging.info(f'Re-building single page for {lang} pdf/test') + logging.info(f"Re-building single page for {lang} pdf/test") with util.temp_dir() as test_dir: - extra['single_page'] = False - cfg.load_dict({ - 'docs_dir': docs_temp_lang, - 'site_dir': test_dir, - 'extra': extra, - 'nav': [ - {cfg.data.get('site_name'): 'single.md'} - ] - }) + extra["single_page"] = False + cfg.load_dict( + { + "docs_dir": docs_temp_lang, + "site_dir": test_dir, + "extra": extra, + "nav": [{cfg.data.get("site_name"): "single.md"}], + } + ) mkdocs.commands.build.build(cfg) - css_in = ' '.join(website.get_css_in(args)) - js_in = ' '.join(website.get_js_in(args)) - subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True) - subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True) + css_in = " ".join(website.get_css_in(args)) + js_in = " ".join(website.get_js_in(args)) + subprocess.check_call( + f"cat {css_in} > {test_dir}/css/base.css", shell=True + ) + subprocess.check_call( + f"cat {js_in} > {test_dir}/js/base.js", shell=True + ) if args.save_raw_single_page: shutil.copytree(test_dir, args.save_raw_single_page) - logging.info(f'Running tests for {lang}') + logging.info(f"Running tests for {lang}") test.test_single_page( - os.path.join(test_dir, 'single', 'index.html'), lang) + os.path.join(test_dir, "single", "index.html"), lang + ) - logging.info(f'Finished building single page version for {lang}') + logging.info(f"Finished building single page version for {lang}") remove_temporary_files(lang, args) diff --git a/docs/tools/test.py b/docs/tools/test.py index 1ea07c45192..d0469d042ee 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -8,14 +8,11 @@ import subprocess def test_single_page(input_path, lang): - if not (lang == 'en'): + if not (lang == "en"): return with open(input_path) as f: - soup = bs4.BeautifulSoup( - f, - features='html.parser' - ) + soup = bs4.BeautifulSoup(f, features="html.parser") anchor_points = set() @@ -23,30 +20,27 @@ def test_single_page(input_path, lang): links_to_nowhere = 0 for tag in soup.find_all(): - for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]: + for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]: if anchor_point: anchor_points.add(anchor_point) for tag in soup.find_all(): - href = tag.attrs.get('href') - if href and href.startswith('#') and href != '#': + href = tag.attrs.get("href") + if href and href.startswith("#") and href != "#": if href[1:] not in anchor_points: links_to_nowhere += 1 logging.info("Tag %s", tag) - logging.info('Link to nowhere: %s' % href) + logging.info("Link to nowhere: %s" % href) if links_to_nowhere: - logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}') + logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}") sys.exit(1) if len(anchor_points) <= 10: - logging.error('Html parsing is probably broken') + logging.error("Html parsing is probably broken") sys.exit(1) -if __name__ == '__main__': - logging.basicConfig( - level=logging.DEBUG, - stream=sys.stderr - ) +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) test_single_page(sys.argv[1], sys.argv[2]) diff --git a/docs/tools/util.py b/docs/tools/util.py index 25961561f99..fb2f135c85e 100644 --- a/docs/tools/util.py +++ b/docs/tools/util.py @@ -15,7 +15,7 @@ import yaml @contextlib.contextmanager def temp_dir(): - path = tempfile.mkdtemp(dir=os.environ.get('TEMP')) + path = tempfile.mkdtemp(dir=os.environ.get("TEMP")) try: yield path finally: @@ -34,7 +34,7 @@ def cd(new_cwd): def get_free_port(): with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: - s.bind(('', 0)) + s.bind(("", 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) return s.getsockname()[1] @@ -61,12 +61,12 @@ def read_md_file(path): meta_text = [] content = [] if os.path.exists(path): - with open(path, 'r') as f: + with open(path, "r") as f: for line in f: - if line.startswith('---'): + if line.startswith("---"): if in_meta: in_meta = False - meta = yaml.full_load(''.join(meta_text)) + meta = yaml.full_load("".join(meta_text)) else: in_meta = True else: @@ -74,7 +74,7 @@ def read_md_file(path): meta_text.append(line) else: content.append(line) - return meta, ''.join(content) + return meta, "".join(content) def write_md_file(path, meta, content): @@ -82,13 +82,13 @@ def write_md_file(path, meta, content): if not os.path.exists(dirname): os.makedirs(dirname) - with open(path, 'w') as f: + with open(path, "w") as f: if meta: - print('---', file=f) + print("---", file=f) yaml.dump(meta, f) - print('---', file=f) - if not content.startswith('\n'): - print('', file=f) + print("---", file=f) + if not content.startswith("\n"): + print("", file=f) f.write(content) @@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data): value.append((node_key, node_value)) - return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value) + return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value) yaml.add_representer(collections.OrderedDict, represent_ordereddict) @@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict) def init_jinja2_filters(env): import amp import website + chunk_size = 10240 - env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)] - env.filters['html_to_amp'] = amp.html_to_amp - env.filters['adjust_markdown_html'] = website.adjust_markdown_html - env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT') + env.filters["chunks"] = lambda line: [ + line[i : i + chunk_size] for i in range(0, len(line), chunk_size) + ] + env.filters["html_to_amp"] = amp.html_to_amp + env.filters["adjust_markdown_html"] = website.adjust_markdown_html + env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime( + d, "%Y-%m-%d" + ).strftime("%a, %d %b %Y %H:%M:%S GMT") def init_jinja2_env(args): import mdx_clickhouse + env = jinja2.Environment( - loader=jinja2.FileSystemLoader([ - args.website_dir, - os.path.join(args.docs_dir, '_includes') - ]), - extensions=[ - 'jinja2.ext.i18n', - 'jinja2_highlight.HighlightExtension' - ] + loader=jinja2.FileSystemLoader( + [args.website_dir, os.path.join(args.docs_dir, "_includes")] + ), + extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"], ) - env.extend(jinja2_highlight_cssclass='syntax p-3 my-3') - translations_dir = os.path.join(args.website_dir, 'locale') + env.extend(jinja2_highlight_cssclass="syntax p-3 my-3") + translations_dir = os.path.join(args.website_dir, "locale") env.install_gettext_translations( - mdx_clickhouse.get_translations(translations_dir, 'en'), - newstyle=True + mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True ) init_jinja2_filters(env) return env diff --git a/docs/tools/website.py b/docs/tools/website.py index de4cc14670c..2c748d96414 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -17,108 +17,112 @@ import util def handle_iframe(iframe, soup): - allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/'] + allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"] illegal_domain = True - iframe_src = iframe.attrs['src'] + iframe_src = iframe.attrs["src"] for domain in allowed_domains: if iframe_src.startswith(domain): illegal_domain = False break if illegal_domain: - raise RuntimeError(f'iframe from illegal domain: {iframe_src}') - wrapper = soup.new_tag('div') - wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9'] + raise RuntimeError(f"iframe from illegal domain: {iframe_src}") + wrapper = soup.new_tag("div") + wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"] iframe.insert_before(wrapper) iframe.extract() wrapper.insert(0, iframe) - if 'width' in iframe.attrs: - del iframe.attrs['width'] - if 'height' in iframe.attrs: - del iframe.attrs['height'] - iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture' - iframe.attrs['class'] = 'embed-responsive-item' - iframe.attrs['frameborder'] = '0' - iframe.attrs['allowfullscreen'] = '1' + if "width" in iframe.attrs: + del iframe.attrs["width"] + if "height" in iframe.attrs: + del iframe.attrs["height"] + iframe.attrs[ + "allow" + ] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" + iframe.attrs["class"] = "embed-responsive-item" + iframe.attrs["frameborder"] = "0" + iframe.attrs["allowfullscreen"] = "1" def adjust_markdown_html(content): - soup = bs4.BeautifulSoup( - content, - features='html.parser' - ) + soup = bs4.BeautifulSoup(content, features="html.parser") - for a in soup.find_all('a'): - a_class = a.attrs.get('class') - a_href = a.attrs.get('href') - if a_class and 'headerlink' in a_class: - a.string = '\xa0' - if a_href and a_href.startswith('http'): - a.attrs['target'] = '_blank' + for a in soup.find_all("a"): + a_class = a.attrs.get("class") + a_href = a.attrs.get("href") + if a_class and "headerlink" in a_class: + a.string = "\xa0" + if a_href and a_href.startswith("http"): + a.attrs["target"] = "_blank" - for code in soup.find_all('code'): - code_class = code.attrs.get('class') + for code in soup.find_all("code"): + code_class = code.attrs.get("class") if code_class: - code.attrs['class'] = code_class + ['syntax'] + code.attrs["class"] = code_class + ["syntax"] else: - code.attrs['class'] = 'syntax' + code.attrs["class"] = "syntax" - for iframe in soup.find_all('iframe'): + for iframe in soup.find_all("iframe"): handle_iframe(iframe, soup) - for img in soup.find_all('img'): - if img.attrs.get('alt') == 'iframe': - img.name = 'iframe' - img.string = '' + for img in soup.find_all("img"): + if img.attrs.get("alt") == "iframe": + img.name = "iframe" + img.string = "" handle_iframe(img, soup) continue - img_class = img.attrs.get('class') + img_class = img.attrs.get("class") if img_class: - img.attrs['class'] = img_class + ['img-fluid'] + img.attrs["class"] = img_class + ["img-fluid"] else: - img.attrs['class'] = 'img-fluid' + img.attrs["class"] = "img-fluid" - for details in soup.find_all('details'): - for summary in details.find_all('summary'): + for details in soup.find_all("details"): + for summary in details.find_all("summary"): if summary.parent != details: summary.extract() details.insert(0, summary) - for dd in soup.find_all('dd'): - dd_class = dd.attrs.get('class') + for dd in soup.find_all("dd"): + dd_class = dd.attrs.get("class") if dd_class: - dd.attrs['class'] = dd_class + ['pl-3'] + dd.attrs["class"] = dd_class + ["pl-3"] else: - dd.attrs['class'] = 'pl-3' + dd.attrs["class"] = "pl-3" - for div in soup.find_all('div'): - div_class = div.attrs.get('class') - is_admonition = div_class and 'admonition' in div.attrs.get('class') + for div in soup.find_all("div"): + div_class = div.attrs.get("class") + is_admonition = div_class and "admonition" in div.attrs.get("class") if is_admonition: - for a in div.find_all('a'): - a_class = a.attrs.get('class') + for a in div.find_all("a"): + a_class = a.attrs.get("class") if a_class: - a.attrs['class'] = a_class + ['alert-link'] + a.attrs["class"] = a_class + ["alert-link"] else: - a.attrs['class'] = 'alert-link' + a.attrs["class"] = "alert-link" - for p in div.find_all('p'): - p_class = p.attrs.get('class') - if is_admonition and p_class and ('admonition-title' in p_class): - p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2'] + for p in div.find_all("p"): + p_class = p.attrs.get("class") + if is_admonition and p_class and ("admonition-title" in p_class): + p.attrs["class"] = p_class + [ + "alert-heading", + "display-4", + "text-reset", + "mb-2", + ] if is_admonition: - div.attrs['role'] = 'alert' - if ('info' in div_class) or ('note' in div_class): - mode = 'alert-primary' - elif ('attention' in div_class) or ('warning' in div_class): - mode = 'alert-warning' - elif 'important' in div_class: - mode = 'alert-danger' - elif 'tip' in div_class: - mode = 'alert-info' + div.attrs["role"] = "alert" + if ("info" in div_class) or ("note" in div_class): + mode = "alert-primary" + elif ("attention" in div_class) or ("warning" in div_class): + mode = "alert-warning" + elif "important" in div_class: + mode = "alert-danger" + elif "tip" in div_class: + mode = "alert-info" else: - mode = 'alert-secondary' - div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode] + mode = "alert-secondary" + div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode] return str(soup) @@ -128,61 +132,63 @@ def minify_html(content): def build_website(args): - logging.info('Building website') + logging.info("Building website") env = util.init_jinja2_env(args) shutil.copytree( args.website_dir, args.output_dir, ignore=shutil.ignore_patterns( - '*.md', - '*.sh', - '*.css', - '*.json', - 'js/*.js', - 'build', - 'docs', - 'public', - 'node_modules', - 'src', - 'templates', - 'locale', - '.gitkeep' - ) + "*.md", + "*.sh", + "*.css", + "*.json", + "js/*.js", + "build", + "docs", + "public", + "node_modules", + "src", + "templates", + "locale", + ".gitkeep", + ), ) shutil.copytree( - os.path.join(args.website_dir, 'images'), - os.path.join(args.output_dir, 'docs', 'images') + os.path.join(args.website_dir, "images"), + os.path.join(args.output_dir, "docs", "images"), ) # This file can be requested to check for available ClickHouse releases. shutil.copy2( - os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'), - os.path.join(args.output_dir, 'data', 'version_date.tsv')) + os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"), + os.path.join(args.output_dir, "data", "version_date.tsv"), + ) # This file can be requested to install ClickHouse. shutil.copy2( - os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'), - os.path.join(args.output_dir, 'data', 'install.sh')) + os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"), + os.path.join(args.output_dir, "data", "install.sh"), + ) for root, _, filenames in os.walk(args.output_dir): for filename in filenames: - if filename == 'main.html': + if filename == "main.html": continue path = os.path.join(root, filename) - if not filename.endswith('.html'): + if not filename.endswith(".html"): continue - logging.info('Processing %s', path) - with open(path, 'rb') as f: - content = f.read().decode('utf-8') + logging.info("Processing %s", path) + with open(path, "rb") as f: + content = f.read().decode("utf-8") template = env.from_string(content) content = template.render(args.__dict__) - with open(path, 'wb') as f: - f.write(content.encode('utf-8')) + with open(path, "wb") as f: + f.write(content.encode("utf-8")) def get_css_in(args): @@ -193,7 +199,7 @@ def get_css_in(args): f"'{args.website_dir}/css/blog.css'", f"'{args.website_dir}/css/docs.css'", f"'{args.website_dir}/css/highlight.css'", - f"'{args.website_dir}/css/main.css'" + f"'{args.website_dir}/css/main.css'", ] @@ -207,42 +213,41 @@ def get_js_in(args): f"'{args.website_dir}/js/index.js'", f"'{args.website_dir}/js/docsearch.js'", f"'{args.website_dir}/js/docs.js'", - f"'{args.website_dir}/js/main.js'" + f"'{args.website_dir}/js/main.js'", ] def minify_file(path, css_digest, js_digest): - if not ( - path.endswith('.html') or - path.endswith('.css') - ): + if not (path.endswith(".html") or path.endswith(".css")): return - logging.info('Minifying %s', path) - with open(path, 'rb') as f: - content = f.read().decode('utf-8') - if path.endswith('.html'): + logging.info("Minifying %s", path) + with open(path, "rb") as f: + content = f.read().decode("utf-8") + if path.endswith(".html"): content = minify_html(content) - content = content.replace('base.css?css_digest', f'base.css?{css_digest}') - content = content.replace('base.js?js_digest', f'base.js?{js_digest}') -# TODO: restore cssmin -# elif path.endswith('.css'): -# content = cssmin.cssmin(content) -# TODO: restore jsmin -# elif path.endswith('.js'): -# content = jsmin.jsmin(content) - with open(path, 'wb') as f: - f.write(content.encode('utf-8')) + content = content.replace("base.css?css_digest", f"base.css?{css_digest}") + content = content.replace("base.js?js_digest", f"base.js?{js_digest}") + # TODO: restore cssmin + # elif path.endswith('.css'): + # content = cssmin.cssmin(content) + # TODO: restore jsmin + # elif path.endswith('.js'): + # content = jsmin.jsmin(content) + with open(path, "wb") as f: + f.write(content.encode("utf-8")) def minify_website(args): - css_in = ' '.join(get_css_in(args)) - css_out = f'{args.output_dir}/docs/css/base.css' - os.makedirs(f'{args.output_dir}/docs/css') + css_in = " ".join(get_css_in(args)) + css_out = f"{args.output_dir}/docs/css/base.css" + os.makedirs(f"{args.output_dir}/docs/css") if args.minify and False: # TODO: return closure - command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \ + command = ( + f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}" + ) logging.info(css_in) logging.info(command) output = subprocess.check_output(command, shell=True) @@ -251,51 +256,60 @@ def minify_website(args): else: command = f"cat {css_in}" output = subprocess.check_output(command, shell=True) - with open(css_out, 'wb+') as f: + with open(css_out, "wb+") as f: f.write(output) - with open(css_out, 'rb') as f: + with open(css_out, "rb") as f: css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] - js_in = ' '.join(get_js_in(args)) - js_out = f'{args.output_dir}/docs/js/base.js' - os.makedirs(f'{args.output_dir}/docs/js') + js_in = " ".join(get_js_in(args)) + js_out = f"{args.output_dir}/docs/js/base.js" + os.makedirs(f"{args.output_dir}/docs/js") if args.minify and False: # TODO: return closure js_in = [js[1:-1] for js in js_in] closure_args = [ - '--js', *js_in, '--js_output_file', js_out, - '--compilation_level', 'SIMPLE', - '--dependency_mode', 'NONE', - '--third_party', '--use_types_for_optimization', - '--isolation_mode', 'IIFE' + "--js", + *js_in, + "--js_output_file", + js_out, + "--compilation_level", + "SIMPLE", + "--dependency_mode", + "NONE", + "--third_party", + "--use_types_for_optimization", + "--isolation_mode", + "IIFE", ] logging.info(closure_args) if closure.run(*closure_args): - raise RuntimeError('failed to run closure compiler') - with open(js_out, 'r') as f: + raise RuntimeError("failed to run closure compiler") + with open(js_out, "r") as f: js_content = jsmin.jsmin(f.read()) - with open(js_out, 'w') as f: + with open(js_out, "w") as f: f.write(js_content) else: command = f"cat {js_in}" output = subprocess.check_output(command, shell=True) - with open(js_out, 'wb+') as f: + with open(js_out, "wb+") as f: f.write(output) - with open(js_out, 'rb') as f: + with open(js_out, "rb") as f: js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] logging.info(js_digest) if args.minify: - logging.info('Minifying website') + logging.info("Minifying website") with concurrent.futures.ThreadPoolExecutor() as executor: futures = [] for root, _, filenames in os.walk(args.output_dir): for filename in filenames: path = os.path.join(root, filename) - futures.append(executor.submit(minify_file, path, css_digest, js_digest)) + futures.append( + executor.submit(minify_file, path, css_digest, js_digest) + ) for future in futures: exc = future.exception() if exc: @@ -304,24 +318,28 @@ def minify_website(args): def process_benchmark_results(args): - benchmark_root = os.path.join(args.website_dir, 'benchmark') + benchmark_root = os.path.join(args.website_dir, "benchmark") required_keys = { - 'dbms': ['result'], - 'hardware': ['result', 'system', 'system_full', 'kind'] + "dbms": ["result"], + "hardware": ["result", "system", "system_full", "kind"], } - for benchmark_kind in ['dbms', 'hardware']: + for benchmark_kind in ["dbms", "hardware"]: results = [] - results_root = os.path.join(benchmark_root, benchmark_kind, 'results') + results_root = os.path.join(benchmark_root, benchmark_kind, "results") for result in sorted(os.listdir(results_root)): result_file = os.path.join(results_root, result) - logging.debug(f'Reading benchmark result from {result_file}') - with open(result_file, 'r') as f: + logging.debug(f"Reading benchmark result from {result_file}") + with open(result_file, "r") as f: result = json.loads(f.read()) for item in result: for required_key in required_keys[benchmark_kind]: - assert required_key in item, f'No "{required_key}" in {result_file}' + assert ( + required_key in item + ), f'No "{required_key}" in {result_file}' results += result - results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js') - with open(results_js, 'w') as f: + results_js = os.path.join( + args.output_dir, "benchmark", benchmark_kind, "results.js" + ) + with open(results_js, "w") as f: data = json.dumps(results) - f.write(f'var results = {data};') + f.write(f"var results = {data};") diff --git a/docs/zh/development/continuous-integration.md b/docs/zh/development/continuous-integration.md index 4f37b6f88c7..5bebb3aec2a 100644 --- a/docs/zh/development/continuous-integration.md +++ b/docs/zh/development/continuous-integration.md @@ -42,6 +42,8 @@ git push 使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行). 如果失败, 按照[代码样式指南](./style.md)修复样式错误. +使用 [black](https://github.com/psf/black/) 檢查 python 代碼. + ### 报告详情 {#report-details} - [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html) - `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt) diff --git a/docs/zh/development/developer-instruction.md b/docs/zh/development/developer-instruction.md index bd7a197f926..7ade3ad57fb 100644 --- a/docs/zh/development/developer-instruction.md +++ b/docs/zh/development/developer-instruction.md @@ -259,7 +259,7 @@ ClickHouse的架构描述可以在此处查看:https://clickhouse.com/docs/en/ 即使工作尚未完成,也可以创建拉取请求。在这种情况下,请在标题的开头加上«WIP»(正在进行中),以便后续更改。这对于协同审查和讨论更改以及运行所有可用测试用例很有用。提供有关变更的简短描述很重要,这将在后续用于生成重新发布变更日志。 -Yandex成员一旦在您的拉取请求上贴上«可以测试»标签,就会开始测试。一些初始检查项(例如,代码类型)的结果会在几分钟内反馈。构建的检查结果将在半小时内完成。而主要的测试用例集结果将在一小时内报告给您。 +ClickHouse成员一旦在您的拉取请求上贴上«可以测试»标签,就会开始测试。一些初始检查项(例如,代码类型)的结果会在几分钟内反馈。构建的检查结果将在半小时内完成。而主要的测试用例集结果将在一小时内报告给您。 系统将分别为您的拉取请求准备ClickHouse二进制版本。若要检索这些构建信息,请在检查列表中单击« ClickHouse构建检查»旁边的«详细信息»链接。在这里,您会找到指向ClickHouse的.deb软件包的直接链接,此外,甚至可以将其部署在生产服务器上(如果您不担心)。 diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md index aa2c82d902a..24e0834d2fc 100644 --- a/docs/zh/engines/table-engines/integrations/hive.md +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -140,7 +140,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index b81d2206bf4..f5f2c428ea7 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -8,7 +8,7 @@ toc_title: "版本折叠MergeTree" 这个引擎: - 允许快速写入不断变化的对象状态。 -- 删除后台中的旧对象状态。 这显着降低了存储体积。 +- 删除后台中的旧对象状态。 这显著降低了存储体积。 请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。 diff --git a/docs/zh/getting-started/playground.md b/docs/zh/getting-started/playground.md index 33636c92829..f8f611d9d8d 100644 --- a/docs/zh/getting-started/playground.md +++ b/docs/zh/getting-started/playground.md @@ -3,62 +3,41 @@ toc_priority: 14 toc_title: 体验平台 --- -# ClickHouse体验平台 {#clickhouse-playground} +# ClickHouse Playground {#clickhouse-playground} -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. -[ClickHouse体验平台](https://play.clickhouse.com?file=welcome) 允许人们通过即时运行查询来尝试ClickHouse,而无需设置他们的服务器或集群。 - -体验平台中提供几个示例数据集以及显示ClickHouse特性的示例查询。还有一些ClickHouse LTS版本可供尝试。 - -您可以使用任何HTTP客户端对ClickHouse体验平台进行查询,例如[curl](https://curl.haxx.se)或者[wget](https://www.gnu.org/software/wget/),或使用[JDBC](../interfaces/jdbc.md)或者[ODBC](../interfaces/odbc.md)驱动连接。关于支持ClickHouse的软件产品的更多信息详见[here](../interfaces/index.md). +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). ## Credentials {#credentials} -| 参数 | 值 | -|:--------------------|:----------------------------------------| -| HTTPS端点 | `https://play-api.clickhouse.com:8443` | -| TCP端点 | `play-api.clickhouse.com:9440` | -| 用户 | `playground` | -| 密码 | `clickhouse` | +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | -还有一些带有特定ClickHouse版本的附加信息来试验它们之间的差异(端口和用户/密码与上面相同): +## Limitations {#limitations} -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` +The queries are executed as a read-only user. It implies some limitations: -!!! note "注意" - 所有这些端点都需要安全的TLS连接。 +- DDL queries are not allowed +- INSERT queries are not allowed -## 查询限制 {#limitations} +The service also have quotas on its usage. -查询以只读用户身份执行。 这意味着一些局限性: +## Examples {#examples} -- 不允许DDL查询 -- 不允许插入查询 - -还强制执行以下设置: -- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time) - -ClickHouse体验还有如下: -[ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse) -实例托管 [Yandex云](https://cloud.yandex.com/)。 -更多信息 [云提供商](../commercial/cloud.md)。 - -## 示例 {#examples} - -使用`curl`连接Https服务: +HTTPS endpoint example with `curl`: ``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` -TCP连接示例[CLI](../interfaces/cli.md): +TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" +clickhouse client --secure --host play.clickhouse.com --user explorer ``` diff --git a/docs/zh/interfaces/formats.md b/docs/zh/interfaces/formats.md index 4327a657793..40e9bfe7ff1 100644 --- a/docs/zh/interfaces/formats.md +++ b/docs/zh/interfaces/formats.md @@ -1240,7 +1240,8 @@ SELECT * FROM topic1_stream; | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` | | — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1295,7 +1296,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `-` | [Array](../sql-reference/data-types/array.md) | `LIST` | diff --git a/docs/zh/operations/configuration-files.md b/docs/zh/operations/configuration-files.md index 7998baafb6c..c99b8fcfca3 100644 --- a/docs/zh/operations/configuration-files.md +++ b/docs/zh/operations/configuration-files.md @@ -3,7 +3,7 @@ ClickHouse支持多配置文件管理。主配置文件是`/etc/clickhouse-server/config.xml`。其余文件须在目录`/etc/clickhouse-server/config.d`。 !!! 注意: - 所有配置文件必须是XML格式。此外,配置文件须有相同的跟元素,通常是``。 + 所有配置文件必须是XML格式。此外,配置文件须有相同的根元素,通常是``。 主配置文件中的一些配置可以通过`replace`或`remove`属性被配置文件覆盖。 diff --git a/docs/zh/operations/system-tables/functions.md b/docs/zh/operations/system-tables/functions.md index 695c7b7fee1..75df1f65c1f 100644 --- a/docs/zh/operations/system-tables/functions.md +++ b/docs/zh/operations/system-tables/functions.md @@ -15,7 +15,7 @@ ``` ┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐ │ sumburConsistentHash │ 0 │ 0 │ │ -│ yandexConsistentHash │ 0 │ 0 │ │ +│ kostikConsistentHash │ 0 │ 0 │ │ │ demangle │ 0 │ 0 │ │ │ addressToLine │ 0 │ 0 │ │ │ JSONExtractRaw │ 0 │ 0 │ │ diff --git a/docs/zh/operations/system-tables/numbers_mt.md b/docs/zh/operations/system-tables/numbers_mt.md index 185bee95171..cf1c96acaab 100644 --- a/docs/zh/operations/system-tables/numbers_mt.md +++ b/docs/zh/operations/system-tables/numbers_mt.md @@ -1,10 +1,5 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.numbers_mt {#system-numbers-mt} -# 系统。numbers_mt {#system-numbers-mt} - -一样的 [系统。数字](../../operations/system-tables/numbers.md) 但读取是并行的。 这些数字可以以任何顺序返回。 +与[system.numbers](../../operations/system-tables/numbers.md)相似,但读取是并行的。 这些数字可以以任何顺序返回。 用于测试。 diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md index 52c705522e5..64f656185c1 100644 --- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md +++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md @@ -2,7 +2,7 @@ 此数据类型的唯一目的是表示不是期望值的情况。 所以不能创建一个 `Nothing` 类型的值。 -例如,文本 [NULL](../../../sql-reference/data-types/special-data-types/nothing.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/special-data-types/nothing.md)。 +例如,字面量 [NULL](../../../sql-reference/syntax.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/nullable.md)。 `Nothing` 类型也可以用来表示空数组: diff --git a/docs/zh/sql-reference/functions/ext-dict-functions.md b/docs/zh/sql-reference/functions/ext-dict-functions.md index 12b9499cb64..87e19dc0119 100644 --- a/docs/zh/sql-reference/functions/ext-dict-functions.md +++ b/docs/zh/sql-reference/functions/ext-dict-functions.md @@ -31,7 +31,7 @@ - 对于’dict_name’分层字典,查找’child_id’键是否位于’ancestor_id’内(或匹配’ancestor_id’)。返回UInt8。 -## 独裁主义 {#dictgethierarchy} +## dictGetHierarchy {#dictgethierarchy} `dictGetHierarchy('dict_name', id)` diff --git a/packages/.gitignore b/packages/.gitignore new file mode 100644 index 00000000000..355164c1265 --- /dev/null +++ b/packages/.gitignore @@ -0,0 +1 @@ +*/ diff --git a/packages/build b/packages/build new file mode 100755 index 00000000000..53a7538f80e --- /dev/null +++ b/packages/build @@ -0,0 +1,156 @@ +#!/usr/bin/env bash + +set -e + +# Avoid dependency on locale +LC_ALL=C + +# Normalize output directory +if [ -n "$OUTPUT_DIR" ]; then + OUTPUT_DIR=$(realpath -m "$OUTPUT_DIR") +fi + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +cd "$CUR_DIR" + +ROOT_DIR=$(readlink -f "$(git rev-parse --show-cdup)") + +PKG_ROOT='root' + +DEB_ARCH=${DEB_ARCH:-amd64} +OUTPUT_DIR=${OUTPUT_DIR:-$ROOT_DIR} +[ -d "${OUTPUT_DIR}" ] || mkdir -p "${OUTPUT_DIR}" +SANITIZER=${SANITIZER:-""} +SOURCE=${SOURCE:-$PKG_ROOT} + +HELP="${0} [--test] [--rpm] [-h|--help] + --test - adds '+test' prefix to version + --apk - build APK packages + --rpm - build RPM packages + --tgz - build tarball package + --help - show this help and exit + +Used envs: + DEB_ARCH='${DEB_ARCH}' + OUTPUT_DIR='${OUTPUT_DIR}' - where the artifact will be placed + SANITIZER='${SANITIZER}' - if any sanitizer is used, affects version string + SOURCE='${SOURCE}' - directory with sources tree + VERSION_STRING='${VERSION_STRING}' - the package version to overwrite +" + +if [ -z "${VERSION_STRING}" ]; then + # Get CLICKHOUSE_VERSION_STRING from the current git repo + eval "$("$ROOT_DIR/tests/ci/version_helper.py" -e)" +else + CLICKHOUSE_VERSION_STRING=${VERSION_STRING} +fi +export CLICKHOUSE_VERSION_STRING + + + +while [[ $1 == --* ]] +do + case "$1" in + --test ) + VERSION_POSTFIX+='+test' + shift ;; + --apk ) + MAKE_APK=1 + shift ;; + --rpm ) + MAKE_RPM=1 + shift ;; + --tgz ) + MAKE_TGZ=1 + shift ;; + --help ) + echo "$HELP" + exit ;; + * ) + echo "Unknown option $1" + exit 2 ;; + esac +done + +function deb2tgz { + local FILE PKG_NAME PKG_DIR PKG_PATH TARBALL + FILE=$1 + PKG_NAME=${FILE##*/}; PKG_NAME=${PKG_NAME%%_*} + PKG_DIR="$PKG_NAME-$CLICKHOUSE_VERSION_STRING" + PKG_PATH="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING" + TARBALL="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING-$DEB_ARCH.tgz" + rm -rf "$PKG_PATH" + dpkg-deb -R "$FILE" "$PKG_PATH" + mkdir -p "$PKG_PATH/install" + cat > "$PKG_PATH/install/doinst.sh" << 'EOF' +#!/bin/sh +set -e + +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +for filepath in `find $SCRIPTPATH/.. -type f -or -type l | grep -v "\.\./install/"`; do + destpath=${filepath##$SCRIPTPATH/..} + mkdir -p $(dirname "$destpath") + cp -r "$filepath" "$destpath" +done +EOF + chmod +x "$PKG_PATH/install/doinst.sh" + if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then + tail +2 "$PKG_PATH/DEBIAN/postinst" > "$PKG_PATH/install/doinst.sh" + fi + rm -rf "$PKG_PATH/DEBIAN" + if [ -f "/usr/bin/pigz" ]; then + tar --use-compress-program=pigz -cf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR" + else + tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR" + fi + + rm -r "$PKG_PATH" +} + +# Build options +if [ -n "$SANITIZER" ]; then + if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan" + elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan" + elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan" + elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan" + else + echo "Unknown value of SANITIZER variable: $SANITIZER" + exit 3 + fi +elif [[ $BUILD_TYPE == 'debug' ]]; then + VERSION_POSTFIX+="+debug" +fi + +if [[ "$PKG_ROOT" != "$SOURCE" ]]; then + # packages are built only from PKG_SOURCE + rm -rf "./$PKG_ROOT" + ln -sf "$SOURCE" "$PKG_SOURCE" +fi + +CLICKHOUSE_VERSION_STRING+=$VERSION_POSTFIX +echo -e "\nCurrent version is $CLICKHOUSE_VERSION_STRING" + +for config in clickhouse*.yaml; do + echo "Building deb package for $config" + + # Preserve package path + exec 9>&1 + PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9) + PKG_PATH=${PKG_PATH##*created package: } + exec 9>&- + + if [ -n "$MAKE_APK" ]; then + echo "Building apk package for $config" + nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk + fi + if [ -n "$MAKE_RPM" ]; then + echo "Building rpm package for $config" + nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm + fi + if [ -n "$MAKE_TGZ" ]; then + echo "Building tarball for $config" + deb2tgz "$PKG_PATH" + fi +done + +# vim: ts=4: sw=4: sts=4: expandtab diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml new file mode 100644 index 00000000000..2a1389b6625 --- /dev/null +++ b/packages/clickhouse-client.yaml @@ -0,0 +1,57 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-client" +arch: "all" +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +replaces: +- clickhouse-compressor +conflicts: +- clickhouse-compressor + +maintainer: "ClickHouse Dev Team " +description: | + Client binary for ClickHouse + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark + +overrides: + deb: + depends: + - clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING}) + rpm: + depends: + - clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING} + +contents: +- src: root/etc/clickhouse-client/config.xml + dst: /etc/clickhouse-client/config.xml + type: config +- src: root/usr/bin/clickhouse-benchmark + dst: /usr/bin/clickhouse-benchmark +- src: root/usr/bin/clickhouse-compressor + dst: /usr/bin/clickhouse-compressor +- src: root/usr/bin/clickhouse-format + dst: /usr/bin/clickhouse-format +- src: root/usr/bin/clickhouse-client + dst: /usr/bin/clickhouse-client +- src: root/usr/bin/clickhouse-local + dst: /usr/bin/clickhouse-local +- src: root/usr/bin/clickhouse-obfuscator + dst: /usr/bin/clickhouse-obfuscator +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-client/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-client/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-client/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-client/README.md diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml new file mode 100644 index 00000000000..12a1594bd30 --- /dev/null +++ b/packages/clickhouse-common-static-dbg.yaml @@ -0,0 +1,38 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-common-static-dbg" +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +replaces: +- clickhouse-common-dbg +conflicts: +- clickhouse-common-dbg + +maintainer: "ClickHouse Dev Team " +description: | + debugging symbols for clickhouse-common-static + This package contains the debugging symbols for clickhouse-common. + +contents: +- src: root/usr/lib/debug/usr/bin/clickhouse.debug + dst: /usr/lib/debug/usr/bin/clickhouse.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-common-static-dbg/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-common-static-dbg/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-common-static-dbg/README.md diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml new file mode 100644 index 00000000000..269d4318e5e --- /dev/null +++ b/packages/clickhouse-common-static.yaml @@ -0,0 +1,48 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-common-static" +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +replaces: +- clickhouse-common +- clickhouse-server-base +provides: +- clickhouse-common +- clickhouse-server-base +suggests: +- clickhouse-common-static-dbg + +maintainer: "ClickHouse Dev Team " +description: | + Common files for ClickHouse + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + This package provides common files for both clickhouse server and client + +contents: +- src: root/usr/bin/clickhouse + dst: /usr/bin/clickhouse +- src: root/usr/bin/clickhouse-odbc-bridge + dst: /usr/bin/clickhouse-odbc-bridge +- src: root/usr/bin/clickhouse-library-bridge + dst: /usr/bin/clickhouse-library-bridge +- src: root/usr/bin/clickhouse-extract-from-config + dst: /usr/bin/clickhouse-extract-from-config +- src: root/usr/share/bash-completion/completions + dst: /usr/share/bash-completion/completions +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-common-static/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-common-static/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-common-static/README.md diff --git a/packages/clickhouse-keeper-dbg.yaml b/packages/clickhouse-keeper-dbg.yaml new file mode 100644 index 00000000000..2c70b7ad4aa --- /dev/null +++ b/packages/clickhouse-keeper-dbg.yaml @@ -0,0 +1,28 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-keeper-dbg" +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" +maintainer: "ClickHouse Dev Team " +description: | + debugging symbols for clickhouse-keeper + This package contains the debugging symbols for clickhouse-keeper. + +contents: +- src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug + dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-keeper-dbg/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-keeper-dbg/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-keeper-dbg/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-keeper-dbg/README.md diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml new file mode 100644 index 00000000000..e99ac30f944 --- /dev/null +++ b/packages/clickhouse-keeper.yaml @@ -0,0 +1,40 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-keeper" +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +conflicts: +- clickhouse-server +depends: +- adduser +suggests: +- clickhouse-keeper-dbg + +maintainer: "ClickHouse Dev Team " +description: | + Static clickhouse-keeper binary + A stand-alone clickhouse-keeper package + + +contents: +- src: root/etc/clickhouse-keeper + dst: /etc/clickhouse-keeper + type: config +- src: root/usr/bin/clickhouse-keeper + dst: /usr/bin/clickhouse-keeper +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-keeper/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-keeper/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-keeper/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-keeper/README.md diff --git a/debian/clickhouse-server.init b/packages/clickhouse-server.init similarity index 100% rename from debian/clickhouse-server.init rename to packages/clickhouse-server.init diff --git a/debian/clickhouse-server.postinst b/packages/clickhouse-server.postinstall similarity index 100% rename from debian/clickhouse-server.postinst rename to packages/clickhouse-server.postinstall diff --git a/debian/clickhouse-server.service b/packages/clickhouse-server.service similarity index 91% rename from debian/clickhouse-server.service rename to packages/clickhouse-server.service index a9400b24270..028b4fbf8ab 100644 --- a/debian/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 -CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE +CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE [Install] # ClickHouse should not start from the rescue shell (rescue.target). diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml new file mode 100644 index 00000000000..ed56eb27e54 --- /dev/null +++ b/packages/clickhouse-server.yaml @@ -0,0 +1,68 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-server" +arch: "all" +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +conflicts: +- clickhouse-keeper +depends: +- adduser +replaces: +- clickhouse-server-common +- clickhouse-server-base +provides: +- clickhouse-server-common +recommends: +- libcap2-bin + +maintainer: "ClickHouse Dev Team " +description: | + Server binary for ClickHouse + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + This package provides clickhouse common configuration files + +overrides: + deb: + depends: + - clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING}) + rpm: + depends: + - clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING} + +contents: +- src: root/etc/clickhouse-server + dst: /etc/clickhouse-server + type: config +- src: clickhouse-server.init + dst: /etc/init.d/clickhouse-server +- src: clickhouse-server.service + dst: /lib/systemd/system/clickhouse-server.service +- src: root/usr/bin/clickhouse-copier + dst: /usr/bin/clickhouse-copier +- src: clickhouse + dst: /usr/bin/clickhouse-keeper + type: symlink +- src: root/usr/bin/clickhouse-report + dst: /usr/bin/clickhouse-report +- src: root/usr/bin/clickhouse-server + dst: /usr/bin/clickhouse-server +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-server/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-server/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-server/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-server/README.md + +scripts: + postinstall: ./clickhouse-server.postinstall diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 0890b9c95d3..cca7be97b61 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -460,10 +460,6 @@ else () list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter) endif () - if (NOT BUILD_STRIPPED_BINARIES_PREFIX) - install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - endif() - add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE}) if (USE_GDB_ADD_INDEX) @@ -474,13 +470,14 @@ else () add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM) endif() - if (BUILD_STRIPPED_BINARIES_PREFIX) - clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH clickhouse) + if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse) + else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT}) + install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() endif() - - if (ENABLE_TESTS) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index f3e7f1775b8..a34ce02b293 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -163,10 +163,24 @@ void Client::initialize(Poco::Util::Application & self) configReadClient(config(), home_path); + /** getenv is thread-safe in Linux glibc and in all sane libc implementations. + * But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer. + * + * man getenv: + * + * As typically implemented, getenv() returns a pointer to a string within the environment list. + * The caller must take care not to modify this string, since that would change the environment of + * the process. + * + * The implementation of getenv() is not required to be reentrant. The string pointed to by the return value of getenv() + * may be statically allocated, and can be modified by a subsequent call to getenv(), putenv(3), setenv(3), or unsetenv(3). + */ + const char * env_user = getenv("CLICKHOUSE_USER"); - const char * env_password = getenv("CLICKHOUSE_PASSWORD"); if (env_user) config().setString("user", env_user); + + const char * env_password = getenv("CLICKHOUSE_PASSWORD"); if (env_password) config().setString("password", env_password); @@ -810,7 +824,7 @@ void Client::addOptions(OptionsDescription & options_description) ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") ("max_client_network_bandwidth", po::value(), "the maximum speed of data exchange over the network for the client in bytes per second.") - ("compression", po::value(), "enable or disable compression") + ("compression", po::value(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).") ("query-fuzzer-runs", po::value()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.") ("interleave-queries-file", po::value>()->multitoken(), @@ -820,6 +834,7 @@ void Client::addOptions(OptionsDescription & options_description) ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") ("no-warnings", "disable warnings when client connects to server") + ("fake-drop", "Ignore all DROP queries, should be used only for testing") ; /// Commandline options related to external tables. @@ -952,6 +967,8 @@ void Client::processOptions(const OptionsDescription & options_description, config().setBool("compression", options["compression"].as()); if (options.count("no-warnings")) config().setBool("no-warnings", true); + if (options.count("fake-drop")) + fake_drop = true; if ((query_fuzzer_runs = options["query-fuzzer-runs"].as())) { @@ -1002,6 +1019,7 @@ void Client::processConfig() global_context->setCurrentQueryId(query_id); } print_stack_trace = config().getBool("stacktrace", false); + logging_initialized = true; if (config().has("multiquery")) is_multiquery = true; diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index d47372631fe..d0fc3528473 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -66,40 +66,40 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) using namespace DB; namespace po = boost::program_options; - po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); - desc.add_options() - ("help,h", "produce help message") - ("input", po::value()->value_name("INPUT"), "input file") - ("output", po::value()->value_name("OUTPUT"), "output file") - ("decompress,d", "decompress") - ("offset-in-compressed-file", po::value()->default_value(0ULL), "offset to the compressed block (i.e. physical file offset)") - ("offset-in-decompressed-block", po::value()->default_value(0ULL), "offset to the decompressed block (i.e. virtual offset)") - ("block-size,b", po::value()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size") - ("hc", "use LZ4HC instead of LZ4") - ("zstd", "use ZSTD instead of LZ4") - ("codec", po::value>()->multitoken(), "use codecs combination instead of LZ4") - ("level", po::value(), "compression level for codecs specified via flags") - ("none", "use no compression instead of LZ4") - ("stat", "print block statistics of compressed data") - ; - - po::positional_options_description positional_desc; - positional_desc.add("input", 1); - positional_desc.add("output", 1); - - po::variables_map options; - po::store(po::command_line_parser(argc, argv).options(desc).positional(positional_desc).run(), options); - - if (options.count("help")) - { - std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl; - std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl; - std::cout << desc << std::endl; - return 0; - } - try { + po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); + desc.add_options() + ("help,h", "produce help message") + ("input", po::value()->value_name("INPUT"), "input file") + ("output", po::value()->value_name("OUTPUT"), "output file") + ("decompress,d", "decompress") + ("offset-in-compressed-file", po::value()->default_value(0ULL), "offset to the compressed block (i.e. physical file offset)") + ("offset-in-decompressed-block", po::value()->default_value(0ULL), "offset to the decompressed block (i.e. virtual offset)") + ("block-size,b", po::value()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size") + ("hc", "use LZ4HC instead of LZ4") + ("zstd", "use ZSTD instead of LZ4") + ("codec", po::value>()->multitoken(), "use codecs combination instead of LZ4") + ("level", po::value(), "compression level for codecs specified via flags") + ("none", "use no compression instead of LZ4") + ("stat", "print block statistics of compressed data") + ; + + po::positional_options_description positional_desc; + positional_desc.add("input", 1); + positional_desc.add("output", 1); + + po::variables_map options; + po::store(po::command_line_parser(argc, argv).options(desc).positional(positional_desc).run(), options); + + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl; + std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl; + std::cout << desc << std::endl; + return 0; + } + bool decompress = options.count("decompress"); bool use_lz4hc = options.count("hc"); bool use_zstd = options.count("zstd"); diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 835afcdb2ed..d5206da00f5 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -44,45 +44,47 @@ int mainEntryClickHouseFormat(int argc, char ** argv) { using namespace DB; - boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); - desc.add_options() - ("query", po::value(), "query to format") - ("help,h", "produce help message") - ("hilite", "add syntax highlight with ANSI terminal escape sequences") - ("oneline", "format in single line") - ("quiet,q", "just check syntax, no output on success") - ("multiquery,n", "allow multiple queries in the same file") - ("obfuscate", "obfuscate instead of formatting") - ("backslash", "add a backslash at the end of each line of the formatted query") - ("seed", po::value(), "seed (arbitrary string) that determines the result of obfuscation") - ; - - Settings cmd_settings; - for (const auto & field : cmd_settings.all()) - { - if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size") - cmd_settings.addProgramOption(desc, field); - } - - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); - po::notify(options); - - if (options.count("help")) - { - std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl; - std::cout << desc << std::endl; - return 1; - } - try { + boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); + desc.add_options() + ("query", po::value(), "query to format") + ("help,h", "produce help message") + ("hilite", "add syntax highlight with ANSI terminal escape sequences") + ("oneline", "format in single line") + ("quiet,q", "just check syntax, no output on success") + ("multiquery,n", "allow multiple queries in the same file") + ("obfuscate", "obfuscate instead of formatting") + ("backslash", "add a backslash at the end of each line of the formatted query") + ("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe") + ("seed", po::value(), "seed (arbitrary string) that determines the result of obfuscation") + ; + + Settings cmd_settings; + for (const auto & field : cmd_settings.all()) + { + if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size") + cmd_settings.addProgramOption(desc, field); + } + + boost::program_options::variables_map options; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + po::notify(options); + + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl; + std::cout << desc << std::endl; + return 1; + } + bool hilite = options.count("hilite"); bool oneline = options.count("oneline"); bool quiet = options.count("quiet"); bool multiple = options.count("multiquery"); bool obfuscate = options.count("obfuscate"); bool backslash = options.count("backslash"); + bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert"); if (quiet && (hilite || oneline || obfuscate)) { @@ -154,7 +156,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) const char * pos = query.data(); const char * end = pos + query.size(); - ParserQuery parser(end); + ParserQuery parser(end, allow_settings_after_format_in_insert); do { ASTPtr res = parseQueryAndMovePosition( diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index 749dcbfee5f..18a9bb2627c 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -1231,5 +1231,5 @@ try catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; - throw; + return DB::getCurrentExceptionCode(); } diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index f8df823ecb7..5dec09ea901 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -792,9 +792,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv) fmt::print("Setting capabilities for clickhouse binary. This is optional.\n"); std::string command = fmt::format("command -v setcap >/dev/null" " && command -v capsh >/dev/null" - " && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice+ep >/dev/null 2>&1" - " && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0}" - " || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary." + " && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep >/dev/null 2>&1" + " && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep' {0}" + " || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' or 'net_bind_service' capability for clickhouse binary." " This is optional. Taskstats accounting will be disabled." " To enable taskstats accounting you may add the required capability later manually.\"", fs::canonical(main_bin_path).string()); diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 92bb5dc45a3..b82b13d9607 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -71,17 +71,11 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDelta.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDoubleDelta.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecGorilla.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecT64.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/getCompressionCodecForFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp @@ -137,5 +131,10 @@ if (BUILD_STANDALONE_KEEPER) add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper) + else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) + install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + endif() endif() diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index d7e104685c5..90ce3d8be7f 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -24,10 +24,9 @@ target_link_libraries(clickhouse-library-bridge PRIVATE set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) -if (BUILD_STRIPPED_BINARIES_PREFIX) - clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-library-bridge) -endif() - -if (NOT BUILD_STRIPPED_BINARIES_PREFIX) +if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge) +else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 8afb9c663a3..18b62e65765 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -184,6 +184,11 @@ void LocalServer::tryInitPath() if (path.back() != '/') path += '/'; + fs::create_directories(fs::path(path) / "user_defined/"); + fs::create_directories(fs::path(path) / "data/"); + fs::create_directories(fs::path(path) / "metadata/"); + fs::create_directories(fs::path(path) / "metadata_dropped/"); + global_context->setPath(path); global_context->setTemporaryStorage(path + "tmp"); @@ -304,8 +309,8 @@ void LocalServer::setupUsers() ConfigurationPtr users_config; auto & access_control = global_context->getAccessControl(); - access_control.setPlaintextPasswordSetting(config().getBool("allow_plaintext_password", true)); - access_control.setNoPasswordSetting(config().getBool("allow_no_password", true)); + access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true)); + access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true)); if (config().has("users_config") || config().has("config-file") || fs::exists("config.xml")) { const auto users_config_path = config().getString("users_config", config().getString("config-file", "config.xml")); @@ -429,6 +434,14 @@ catch (...) return getCurrentExceptionCode(); } +void LocalServer::updateLoggerLevel(const String & logs_level) +{ + if (!logging_initialized) + return; + + config().setString("logger.level", logs_level); + updateLevels(config(), logger()); +} void LocalServer::processConfig() { @@ -455,30 +468,31 @@ void LocalServer::processConfig() auto logging = (config().has("logger.console") || config().has("logger.level") || config().has("log-level") + || config().has("send_logs_level") || config().has("logger.log")); - auto file_logging = config().has("server_logs_file"); - if (is_interactive && logging && !file_logging) - throw Exception("For interactive mode logging is allowed only with --server_logs_file option", - ErrorCodes::BAD_ARGUMENTS); + auto level = config().getString("log-level", "trace"); - if (file_logging) + if (config().has("server_logs_file")) { - auto level = Poco::Logger::parseLevel(config().getString("log-level", "trace")); - Poco::Logger::root().setLevel(level); + auto poco_logs_level = Poco::Logger::parseLevel(level); + Poco::Logger::root().setLevel(poco_logs_level); Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::SimpleFileChannel(server_logs_file))); + logging_initialized = true; } - else if (logging) + else if (logging || is_interactive) { - // force enable logging config().setString("logger", "logger"); - // sensitive data rules are not used here + auto log_level_default = is_interactive && !logging ? "none" : level; + config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default))); buildLoggers(config(), logger(), "clickhouse-local"); + logging_initialized = true; } else { Poco::Logger::root().setLevel("none"); Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::NullChannel())); + logging_initialized = false; } shared_context = Context::createShared(); @@ -565,7 +579,6 @@ void LocalServer::processConfig() /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); - fs::create_directories(fs::path(path) / "user_defined/"); LOG_DEBUG(log, "Loading user defined objects from {}", path); Poco::File(path + "user_defined/").createDirectories(); UserDefinedSQLObjectsLoader::instance().loadObjects(global_context); @@ -573,9 +586,6 @@ void LocalServer::processConfig() LOG_DEBUG(log, "Loaded user defined objects."); LOG_DEBUG(log, "Loading metadata from {}", path); - fs::create_directories(fs::path(path) / "data/"); - fs::create_directories(fs::path(path) / "metadata/"); - loadMetadataSystem(global_context); attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); @@ -712,6 +722,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp config().setString("logger.log", options["logger.log"].as()); if (options.count("logger.level")) config().setString("logger.level", options["logger.level"].as()); + if (options.count("send_logs_level")) + config().setString("send_logs_level", options["send_logs_level"].as()); } } diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index 969af7f1b77..e96fb211554 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -46,6 +46,8 @@ protected: void processConfig() override; + void updateLoggerLevel(const String & logs_level) override; + private: /** Composes CREATE subquery based on passed arguments (--structure --file --table and --input-format) * This query will be executed first, before queries passed through --query argument diff --git a/programs/main.cpp b/programs/main.cpp index 2cdda075ca7..62f2f2f3150 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -13,6 +13,8 @@ #include #include /// pair +#include + #include "config_tools.h" #include @@ -332,6 +334,20 @@ struct Checker #endif ; +void checkHarmfulEnvironmentVariables() +{ + /// The list is a selection from "man ld-linux". And one variable that is Mac OS X specific. + /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. + for (const auto * var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"}) + { + if (const char * value = getenv(var); value && value[0]) + { + std::cerr << fmt::format("Environment variable {} is set to {}. It can compromise security.\n", var, value); + _exit(1); + } + } +} + } @@ -352,6 +368,8 @@ int main(int argc_, char ** argv_) inside_main = true; SCOPE_EXIT({ inside_main = false; }); + checkHarmfulEnvironmentVariables(); + /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. std::set_new_handler(nullptr); diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 44493d7ab8a..b530e08ca26 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -39,11 +39,10 @@ if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) endif() -if (BUILD_STRIPPED_BINARIES_PREFIX) - clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-odbc-bridge) -endif() - -if (NOT BUILD_STRIPPED_BINARIES_PREFIX) +if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge) +else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7b81220a4c1..7c133e5be47 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,8 @@ #include #include #include +#include +#include #include #include #include @@ -79,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -514,6 +518,101 @@ void checkForUsersNotInMainConfig( } } +/// Unused in other builds +#if defined(OS_LINUX) +static String readString(const String & path) +{ + ReadBufferFromFile in(path); + String contents; + readStringUntilEOF(contents, in); + return contents; +} + +static int readNumber(const String & path) +{ + ReadBufferFromFile in(path); + int result; + readText(result, in); + return result; +} + +#endif + +static void sanityChecks(Server * server) +{ + std::string data_path = getCanonicalPath(server->config().getString("path", DBMS_DEFAULT_PATH)); + std::string logs_path = server->config().getString("logger.log", ""); + +#if defined(OS_LINUX) + try + { + if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos) + server->context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded."); + } + catch (...) + { + } + + try + { + if (readNumber("/proc/sys/vm/overcommit_memory") == 2) + server->context()->addWarningMessage("Linux memory overcommit is disabled."); + } + catch (...) + { + } + + try + { + if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos) + server->context()->addWarningMessage("Linux transparent hugepage are set to \"always\"."); + } + catch (...) + { + } + + try + { + if (readNumber("/proc/sys/kernel/pid_max") < 30000) + server->context()->addWarningMessage("Linux max PID is too low."); + } + catch (...) + { + } + + try + { + if (readNumber("/proc/sys/kernel/threads-max") < 30000) + server->context()->addWarningMessage("Linux threads max count is too low."); + } + catch (...) + { + } + + std::string dev_id = getBlockDeviceId(data_path); + if (getBlockDeviceType(dev_id) == BlockDeviceType::ROT && getBlockDeviceReadAheadBytes(dev_id) == 0) + server->context()->addWarningMessage("Rotational disk with disabled readahead is in use. Performance can be degraded."); +#endif + + try + { + if (getAvailableMemoryAmount() < (2l << 30)) + server->context()->addWarningMessage("Available memory at server startup is too low (2GiB)."); + + if (!enoughSpaceInDirectory(data_path, 1ull << 30)) + server->context()->addWarningMessage("Available disk space at server startup is too low (1GiB)."); + + if (!logs_path.empty()) + { + if (!enoughSpaceInDirectory(fs::path(logs_path).parent_path(), 1ull << 30)) + server->context()->addWarningMessage("Available disk space at server startup is too low (1GiB)."); + } + } + catch (...) + { + } +} + int Server::main(const std::vector & /*args*/) { Poco::Logger * log = &logger(); @@ -547,13 +646,14 @@ int Server::main(const std::vector & /*args*/) global_context->addWarningMessage("Server was built in debug mode. It will work slowly."); #endif -if (ThreadFuzzer::instance().isEffective()) - global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); + if (ThreadFuzzer::instance().isEffective()) + global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); #endif + sanityChecks(this); // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will @@ -564,6 +664,10 @@ if (ThreadFuzzer::instance().isEffective()) config().getUInt("thread_pool_queue_size", 10000) ); + IOThreadPool::initialize( + config().getUInt("max_io_thread_pool_size", 100), + config().getUInt("max_io_thread_pool_free_size", 0), + config().getUInt("io_thread_pool_queue_size", 10000)); /// Initialize global local cache for remote filesystem. if (config().has("local_cache_for_remote_fs")) @@ -771,6 +875,38 @@ if (ThreadFuzzer::instance().isEffective()) } } + /// Try to increase limit on number of threads. + { + rlimit rlim; + if (getrlimit(RLIMIT_NPROC, &rlim)) + throw Poco::Exception("Cannot getrlimit"); + + if (rlim.rlim_cur == rlim.rlim_max) + { + LOG_DEBUG(log, "rlimit on number of threads is {}", rlim.rlim_cur); + } + else + { + rlim_t old = rlim.rlim_cur; + rlim.rlim_cur = rlim.rlim_max; + int rc = setrlimit(RLIMIT_NPROC, &rlim); + if (rc != 0) + { + LOG_WARNING(log, "Cannot set max number of threads to {}. error: {}", rlim.rlim_cur, strerror(errno)); + rlim.rlim_cur = old; + } + else + { + LOG_DEBUG(log, "Set max number of threads to {} (was {}).", rlim.rlim_cur, old); + } + } + + if (rlim.rlim_cur < 30000) + { + global_context->addWarningMessage("Maximum number of threads is lower than 30000. There could be problems with handling a lot of simultaneous queries."); + } + } + static ServerErrorHandler error_handler; Poco::ErrorHandler::set(&error_handler); @@ -834,6 +970,36 @@ if (ThreadFuzzer::instance().isEffective()) fs::create_directories(path / "metadata_dropped/"); } +#if USE_ROCKSDB + /// Initialize merge tree metadata cache + if (config().has("merge_tree_metadata_cache")) + { + fs::create_directories(path / "rocksdb/"); + size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20); + bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false); + try + { + LOG_DEBUG( + log, "Initiailizing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted); + global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); + } + catch (...) + { + if (continue_if_corrupted) + { + /// Rename rocksdb directory and reinitialize merge tree metadata cache + time_t now = time(nullptr); + fs::rename(path / "rocksdb", path / ("rocksdb.old." + std::to_string(now))); + global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); + } + else + { + throw; + } + } + } +#endif + if (config().has("interserver_http_port") && config().has("interserver_https_port")) throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); @@ -1033,8 +1199,8 @@ if (ThreadFuzzer::instance().isEffective()) std::make_unique( new KeeperTCPHandlerFactory( config_getter, global_context->getKeeperDispatcher(), - global_context->getSettingsRef().receive_timeout, - global_context->getSettingsRef().send_timeout, + global_context->getSettingsRef().receive_timeout.totalSeconds(), + global_context->getSettingsRef().send_timeout.totalSeconds(), false), server_pool, socket)); }); @@ -1056,8 +1222,8 @@ if (ThreadFuzzer::instance().isEffective()) std::make_unique( new KeeperTCPHandlerFactory( config_getter, global_context->getKeeperDispatcher(), - global_context->getSettingsRef().receive_timeout, - global_context->getSettingsRef().send_timeout, true), server_pool, socket)); + global_context->getSettingsRef().receive_timeout.totalSeconds(), + global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket)); #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", @@ -1080,9 +1246,10 @@ if (ThreadFuzzer::instance().isEffective()) auto & access_control = global_context->getAccessControl(); if (config().has("custom_settings_prefixes")) access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes")); - ///set the allow_plaintext_and_no_password setting in context. - access_control.setPlaintextPasswordSetting(config().getBool("allow_plaintext_password", true)); - access_control.setNoPasswordSetting(config().getBool("allow_no_password", true)); + + access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true)); + access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true)); + /// Initialize access storages. try { @@ -1347,7 +1514,8 @@ if (ThreadFuzzer::instance().isEffective()) else { /// Initialize a watcher periodically updating DNS cache - dns_cache_updater = std::make_unique(global_context, config().getInt("dns_cache_update_period", 15)); + dns_cache_updater = std::make_unique( + global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5)); } #if defined(OS_LINUX) @@ -1482,6 +1650,8 @@ if (ThreadFuzzer::instance().isEffective()) server.start(); LOG_INFO(log, "Listening for {}", server.getDescription()); } + + global_context->setServerCompletelyStarted(); LOG_INFO(log, "Ready for connections."); } @@ -1557,6 +1727,7 @@ if (ThreadFuzzer::instance().isEffective()) return Application::EXIT_OK; } + void Server::createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, diff --git a/programs/server/config.xml b/programs/server/config.xml index 9e741e50605..e4d74a60177 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -148,13 +148,13 @@ - + none true true @@ -372,7 +372,7 @@ /var/lib/clickhouse/tmp/ - + ` @@ -769,14 +769,14 @@ --> - + + - - - - - - - + + + system + processors_profile_log
+ toYYYYMM(event_date) + 7500 +
+ + + + 268435456 + true +
diff --git a/programs/server/config.yaml.example b/programs/server/config.yaml.example index 74e4967c606..9f474b4ff4c 100644 --- a/programs/server/config.yaml.example +++ b/programs/server/config.yaml.example @@ -103,7 +103,7 @@ interserver_http_port: 9009 # If not specified, than it is determined analogous to 'hostname -f' command. # This setting could be used to switch replication to another network interface # (the server may be connected to multiple networks via multiple addresses) -# interserver_http_host: example.yandex.ru +# interserver_http_host: example.clickhouse.com # You can specify credentials for authenthication between replicas. # This is required when interserver_https_port is accessible from untrusted networks, @@ -592,10 +592,10 @@ remote_servers: # remote_url_allow_hosts: # Host should be specified exactly as in URL. The name is checked before DNS resolution. -# Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts. +# Example: "clickhouse.com", "clickhouse.com." and "www.clickhouse.com" are different hosts. # If port is explicitly specified in URL, the host:port is checked as a whole. # If host specified here without port, any port with this host allowed. -# "yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed. +# "clickhouse.com" -> "clickhouse.com:443", "clickhouse.com:80" etc. is allowed, but "clickhouse.com:80" -> only "clickhouse.com:80" is allowed. # If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]". # If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked. @@ -803,16 +803,6 @@ crash_log: partition_by: '' flush_interval_milliseconds: 1000 -# Parameters for embedded dictionaries, used in Yandex.Metrica. -# See https://clickhouse.com/docs/en/dicts/internal_dicts/ - -# Path to file with region hierarchy. -# path_to_regions_hierarchy_file: /opt/geo/regions_hierarchy.txt - -# Path to directory with files containing names of regions -# path_to_regions_names_files: /opt/geo/ - - # top_level_domains_path: /var/lib/clickhouse/top_level_domains/ # Custom TLD lists. # Format: name: /path/to/file diff --git a/programs/server/play.html b/programs/server/play.html index 08934196f67..146316ef61f 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -266,12 +266,25 @@ color: var(--null-color); } + @keyframes hourglass-animation { + 0% { + transform: rotate(-180deg); + } + 50% { + transform: rotate(-180deg); + } + 100% { + transform: none; + } + } + #hourglass { display: none; - padding-left: 1rem; + margin-left: 1rem; font-size: 110%; color: #888; + animation: hourglass-animation 1s linear infinite; } #check-mark @@ -457,7 +470,7 @@ } document.getElementById('check-mark').style.display = 'none'; - document.getElementById('hourglass').style.display = 'inline'; + document.getElementById('hourglass').style.display = 'inline-block'; xhr.send(query); } @@ -576,7 +589,7 @@ stats.innerText = `Elapsed: ${seconds} sec, read ${formatted_rows} rows, ${formatted_bytes}.`; /// We can also render graphs if user performed EXPLAIN PIPELINE graph=1 or EXPLAIN AST graph = 1 - if (response.data.length > 3 && response.data[0][0].startsWith("digraph") && document.getElementById('query').value.match(/^\s*EXPLAIN/i)) { + if (response.data.length > 3 && document.getElementById('query').value.match(/^\s*EXPLAIN/i) && typeof(response.data[0][0]) === "string" && response.data[0][0].startsWith("digraph")) { renderGraph(response); } else { renderTable(response); diff --git a/programs/server/users.xml b/programs/server/users.xml index fd5fe414579..f18562071d8 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -79,9 +79,9 @@ Each element of list has one of the following forms: IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::. - Hostname. Example: server01.yandex.ru. + Hostname. Example: server01.clickhouse.com. To check access, DNS query is performed, and all received addresses compared to peer address. - Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$ + Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$ To check access, DNS PTR query is performed for peer address and then regexp is applied. Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address. Strongly recommended that regexp is ends with $ diff --git a/programs/server/users.yaml.example b/programs/server/users.yaml.example index 76aee04c19b..ddd0ca4466a 100644 --- a/programs/server/users.yaml.example +++ b/programs/server/users.yaml.example @@ -70,9 +70,9 @@ users: # Each element of list has one of the following forms: # ip: IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0 # 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::. - # host: Hostname. Example: server01.yandex.ru. + # host: Hostname. Example: server01.clickhouse.com. # To check access, DNS query is performed, and all received addresses compared to peer address. - # host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$ + # host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$ # To check access, DNS PTR query is performed for peer address and then regexp is applied. # Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address. # Strongly recommended that regexp is ends with $ and take all expression in '' diff --git a/release b/release deleted file mode 100755 index 3eb5591fe2c..00000000000 --- a/release +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env bash - -# If you have "no space left" error, you can change the location of temporary files with BUILDPLACE environment variable. - -# Version increment: -# Default release: 18.1.2 -> 18.2.0: -# ./release --version -# or -# ./release --version minor -# Bugfix release (only with small patches to previous release): 18.1.2 -> 18.1.3: -# ./release --version patch -# Do this once per year: 18.1.2 -> 19.0.0: -# ./release --version major - -set -e - -# Avoid dependency on locale -LC_ALL=C - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -cd $CUR_DIR - -source "./utils/release/release_lib.sh" - -DEBUILD_NOSIGN_OPTIONS="-us -uc" -DEBUILD_NODEPS_OPTIONS="-d" - -if [ -z "$VERSION_STRING" ] ; then - get_revision_author -fi - -while [[ $1 == --* ]] -do - if [[ $1 == '--test' ]]; then - TEST='yes' - VERSION_POSTFIX+=+test - shift - elif [[ $1 == '--check-build-dependencies' ]]; then - DEBUILD_NODEPS_OPTIONS="" - shift - elif [[ $1 == '--version' ]]; then - gen_revision_author $2 - exit 0 - elif [[ $1 == '--rpm' ]]; then - MAKE_RPM=1 - shift - elif [[ $1 == '--tgz' ]]; then - MAKE_TGZ=1 - shift - else - echo "Unknown option $1" - exit 2 - fi -done - -# Build options -if [ -n "$SANITIZER" ] -then - if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan" - elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan" - elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan" - elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan" - else - echo "Unknown value of SANITIZER variable: $SANITIZER" - exit 3 - fi - - export DEB_CC=${DEB_CC=clang-10} - export DEB_CXX=${DEB_CXX=clang++-10} - EXTRAPACKAGES="$EXTRAPACKAGES clang-10 lld-10" -elif [[ $BUILD_TYPE == 'debug' ]]; then - CMAKE_BUILD_TYPE=Debug - VERSION_POSTFIX+="+debug" -fi - -CMAKE_FLAGS=" $MALLOC_OPTS -DSANITIZE=$SANITIZER -DENABLE_CHECK_HEAVY_BUILDS=1 $CMAKE_FLAGS" -[[ -n "$CMAKE_BUILD_TYPE" ]] && CMAKE_FLAGS=" -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE $CMAKE_FLAGS" - -export CMAKE_FLAGS -export EXTRAPACKAGES - -VERSION_STRING+=$VERSION_POSTFIX -echo -e "\nCurrent version is $VERSION_STRING" - -if [ -z "$NO_BUILD" ] ; then - gen_changelog "$VERSION_STRING" "" "$AUTHOR" "" - # Build (only binary packages). - debuild --preserve-env -e PATH \ - -e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \ - -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS} ${DEB_ARCH_FLAG} -fi - -if [ -n "$MAKE_RPM" ]; then - make_rpm -fi - -if [ -n "$MAKE_TGZ" ]; then - make_tgz -fi diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index ef8eccb85fa..91ffd7f04ab 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -173,7 +173,8 @@ void AccessControl::addUsersConfigStorage(const String & storage_name_, const Po auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); }; auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); }; auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); }; - auto new_storage = std::make_shared(storage_name_, check_setting_name_function,is_no_password_allowed_function,is_plaintext_password_allowed_function); + auto new_storage = std::make_shared(storage_name_, check_setting_name_function, + is_no_password_allowed_function, is_plaintext_password_allowed_function); new_storage->setConfig(users_config_); addStorage(new_storage); LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", @@ -209,7 +210,8 @@ void AccessControl::addUsersConfigStorage( auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); }; auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); }; auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); }; - auto new_storage = std::make_shared(storage_name_, check_setting_name_function,is_no_password_allowed_function,is_plaintext_password_allowed_function); + auto new_storage = std::make_shared(storage_name_, check_setting_name_function, + is_no_password_allowed_function, is_plaintext_password_allowed_function); new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_); addStorage(new_storage); LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath()); @@ -411,7 +413,8 @@ UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Ne { try { - return MultipleAccessStorage::authenticate(credentials, address, *external_authenticators,allow_no_password, allow_plaintext_password); + return MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password, + allow_plaintext_password); } catch (...) { @@ -447,26 +450,38 @@ void AccessControl::setCustomSettingsPrefixes(const String & comma_separated_pre setCustomSettingsPrefixes(prefixes); } -void AccessControl::setPlaintextPasswordSetting(bool allow_plaintext_password_) -{ - allow_plaintext_password = allow_plaintext_password_; -} -void AccessControl::setNoPasswordSetting(bool allow_no_password_) -{ - allow_no_password = allow_no_password_; -} - -bool AccessControl::isSettingNameAllowed(const std::string_view & setting_name) const +bool AccessControl::isSettingNameAllowed(const std::string_view setting_name) const { return custom_settings_prefixes->isSettingNameAllowed(setting_name); } -void AccessControl::checkSettingNameIsAllowed(const std::string_view & setting_name) const +void AccessControl::checkSettingNameIsAllowed(const std::string_view setting_name) const { custom_settings_prefixes->checkSettingNameIsAllowed(setting_name); } +void AccessControl::setNoPasswordAllowed(bool allow_no_password_) +{ + allow_no_password = allow_no_password_; +} + +bool AccessControl::isNoPasswordAllowed() const +{ + return allow_no_password; +} + +void AccessControl::setPlaintextPasswordAllowed(bool allow_plaintext_password_) +{ + allow_plaintext_password = allow_plaintext_password_; +} + +bool AccessControl::isPlaintextPasswordAllowed() const +{ + return allow_plaintext_password; +} + + std::shared_ptr AccessControl::getContextAccess( const UUID & user_id, const std::vector & current_roles, @@ -550,15 +565,6 @@ std::vector AccessControl::getAllQuotasUsage() const return quota_cache->getAllQuotasUsage(); } -bool AccessControl::isPlaintextPasswordAllowed() const -{ - return allow_plaintext_password; -} - -bool AccessControl::isNoPasswordAllowed() const -{ - return allow_no_password; -} std::shared_ptr AccessControl::getEnabledSettings( const UUID & user_id, diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 14f4dae9424..0ac3d9cb0c2 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -49,8 +49,6 @@ class AccessControl : public MultipleAccessStorage public: AccessControl(); ~AccessControl() override; - std::atomic_bool allow_plaintext_password; - std::atomic_bool allow_no_password; /// Parses access entities from a configuration loaded from users.xml. /// This function add UsersConfigAccessStorage if it wasn't added before. @@ -113,12 +111,16 @@ public: /// This function also enables custom prefixes to be used. void setCustomSettingsPrefixes(const Strings & prefixes); void setCustomSettingsPrefixes(const String & comma_separated_prefixes); - bool isSettingNameAllowed(const std::string_view & name) const; - void checkSettingNameIsAllowed(const std::string_view & name) const; + bool isSettingNameAllowed(const std::string_view name) const; + void checkSettingNameIsAllowed(const std::string_view name) const; - //sets allow_plaintext_password and allow_no_password setting - void setPlaintextPasswordSetting(const bool allow_plaintext_password_); - void setNoPasswordSetting(const bool allow_no_password_); + /// Allows users without password (by default it's allowed). + void setNoPasswordAllowed(const bool allow_no_password_); + bool isNoPasswordAllowed() const; + + /// Allows users with plaintext password (by default it's allowed). + void setPlaintextPasswordAllowed(const bool allow_plaintext_password_); + bool isPlaintextPasswordAllowed() const; UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); @@ -153,9 +155,6 @@ public: std::vector getAllQuotasUsage() const; - bool isPlaintextPasswordAllowed() const; - bool isNoPasswordAllowed() const; - std::shared_ptr getEnabledSettings( const UUID & user_id, const SettingsProfileElements & settings_from_user, @@ -177,6 +176,8 @@ private: std::unique_ptr settings_profiles_cache; std::unique_ptr external_authenticators; std::unique_ptr custom_settings_prefixes; + std::atomic_bool allow_plaintext_password = true; + std::atomic_bool allow_no_password = true; }; } diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index acf2a972b13..9d229bbc43b 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -120,7 +120,7 @@ AccessEntityPtr deserializeAccessEntityImpl(const String & definition) if (res) throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); res = user = std::make_unique(); - InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query); + InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query, /* allow_no_password = */ true, /* allow_plaintext_password = */ true); } else if (auto * create_role_query = query->as()) { diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 0b69bd5fd0e..accfa0ad33d 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -102,6 +102,7 @@ enum class AccessType \ M(KILL_QUERY, "", GLOBAL, ALL) /* allows to kill a query started by another user (anyone can kill his own queries) */\ + M(KILL_TRANSACTION, "", GLOBAL, ALL) \ \ M(MOVE_PARTITION_BETWEEN_SHARDS, "", GLOBAL, ALL) /* required to be able to move a part/partition to a table identified by its ZooKeeper path */\ @@ -182,6 +183,7 @@ enum class AccessType M(JDBC, "", GLOBAL, SOURCES) \ M(HDFS, "", GLOBAL, SOURCES) \ M(S3, "", GLOBAL, SOURCES) \ + M(HIVE, "", GLOBAL, SOURCES) \ M(SOURCES, "", GROUP, ALL) \ \ M(ALL, "ALL PRIVILEGES", GROUP, NONE) /* full access */ \ diff --git a/src/Access/Common/QuotaDefs.cpp b/src/Access/Common/QuotaDefs.cpp index 5d36a0bdd01..f9f8a56d534 100644 --- a/src/Access/Common/QuotaDefs.cpp +++ b/src/Access/Common/QuotaDefs.cpp @@ -107,6 +107,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type) static const auto info = make_info("EXECUTION_TIME", 1000000000 /* execution_time is stored in nanoseconds */); return info; } + case QuotaType::WRITTEN_BYTES: + { + static const auto info = make_info("WRITTEN_BYTES", 1); + return info; + } case QuotaType::MAX: break; } throw Exception("Unexpected quota type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); diff --git a/src/Access/Common/QuotaDefs.h b/src/Access/Common/QuotaDefs.h index cfd8a07d9ff..dfe2b56ef31 100644 --- a/src/Access/Common/QuotaDefs.h +++ b/src/Access/Common/QuotaDefs.h @@ -13,13 +13,14 @@ enum class QuotaType { QUERIES, /// Number of queries. QUERY_SELECTS, /// Number of select queries. - QUERY_INSERTS, /// Number of inserts queries. + QUERY_INSERTS, /// Number of insert queries. ERRORS, /// Number of queries with exceptions. RESULT_ROWS, /// Number of rows returned as result. RESULT_BYTES, /// Number of bytes returned as result. READ_ROWS, /// Number of rows read from tables. READ_BYTES, /// Number of bytes read from tables. EXECUTION_TIME, /// Total amount of query execution time in nanoseconds. + WRITTEN_BYTES, /// Number of bytes written to tables. MAX }; diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 7393fcd8d36..a9eb27c291c 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -327,7 +327,8 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type) /// Create the 'need_rebuild_lists.mark' file. /// This file will be used later to find out if writing lists is successful or not. - std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)}; + std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)}; + out.close(); lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this}; lists_writing_thread_is_waiting = true; diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index 78dd3c7022a..f2354a3837c 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -13,7 +13,7 @@ namespace DB { namespace ErrorCodes { - extern const int QUOTA_EXPIRED; + extern const int QUOTA_EXCEEDED; } @@ -33,7 +33,7 @@ struct EnabledQuota::Impl "Quota for user " + backQuote(user_name) + " for " + to_string(duration) + " has been exceeded: " + type_info.valueToStringWithName(used) + "/" + type_info.valueToString(max) + ". " + "Interval will end at " + to_string(end_of_interval) + ". " + "Name of quota template: " + backQuote(quota_name), - ErrorCodes::QUOTA_EXPIRED); + ErrorCodes::QUOTA_EXCEEDED); } diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 33bef719eff..8c53216c638 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -441,7 +441,9 @@ void IAccessStorage::notify(const Notifications & notifications) UUID IAccessStorage::authenticate( const Credentials & credentials, const Poco::Net::IPAddress & address, - const ExternalAuthenticators & external_authenticators, bool allow_no_password, bool allow_plaintext_password) const + const ExternalAuthenticators & external_authenticators, + bool allow_no_password, + bool allow_plaintext_password) const { return *authenticateImpl(credentials, address, external_authenticators, /* throw_if_user_not_exists = */ true, allow_no_password, allow_plaintext_password); } @@ -451,7 +453,9 @@ std::optional IAccessStorage::authenticate( const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, - bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const + bool throw_if_user_not_exists, + bool allow_no_password, + bool allow_plaintext_password) const { return authenticateImpl(credentials, address, external_authenticators, throw_if_user_not_exists, allow_no_password, allow_plaintext_password); } @@ -461,7 +465,9 @@ std::optional IAccessStorage::authenticateImpl( const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, - bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const + bool throw_if_user_not_exists, + bool allow_no_password, + bool allow_plaintext_password) const { if (auto id = find(credentials.getUserName())) { @@ -469,8 +475,11 @@ std::optional IAccessStorage::authenticateImpl( { if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); - if (isNoPasswordAllowed(*user, allow_no_password) || isPlaintextPasswordAllowed(*user, allow_plaintext_password)) - throwPasswordTypeNotAllowed(); + + auto auth_type = user->auth_data.getType(); + if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || + ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) + throwAuthenticationTypeNotAllowed(auth_type); if (!areCredentialsValid(*user, credentials, external_authenticators)) throwInvalidCredentials(); @@ -506,15 +515,6 @@ bool IAccessStorage::isAddressAllowed(const User & user, const Poco::Net::IPAddr return user.allowed_client_hosts.contains(address); } -bool IAccessStorage::isPlaintextPasswordAllowed(const User & user, bool allow_plaintext_password) -{ - return !allow_plaintext_password && user.auth_data.getType() == AuthenticationType::PLAINTEXT_PASSWORD; -} - -bool IAccessStorage::isNoPasswordAllowed(const User & user, bool allow_no_password) -{ - return !allow_no_password && user.auth_data.getType() == AuthenticationType::NO_PASSWORD; -} UUID IAccessStorage::generateRandomID() { @@ -610,11 +610,12 @@ void IAccessStorage::throwAddressNotAllowed(const Poco::Net::IPAddress & address throw Exception("Connections from " + address.toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED); } -void IAccessStorage::throwPasswordTypeNotAllowed() +void IAccessStorage::throwAuthenticationTypeNotAllowed(AuthenticationType auth_type) { throw Exception( - "Authentication denied for users configured with AuthType PLAINTEXT_PASSWORD and NO_PASSWORD. Please check with Clickhouse admin to allow allow PLAINTEXT_PASSWORD and NO_PASSWORD through server configuration ", - ErrorCodes::AUTHENTICATION_FAILED); + ErrorCodes::AUTHENTICATION_FAILED, + "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", + toString(auth_type), AuthenticationTypeInfo::get(auth_type).name); } void IAccessStorage::throwInvalidCredentials() { diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 3069e41b285..428a0e8f052 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -18,6 +18,7 @@ namespace DB struct User; class Credentials; class ExternalAuthenticators; +enum class AuthenticationType; /// Contains entities, i.e. instances of classes derived from IAccessEntity. /// The implementations of this class MUST be thread-safe. @@ -148,7 +149,7 @@ public: /// Finds a user, check the provided credentials and returns the ID of the user if they are valid. /// Throws an exception if no such user or credentials are invalid. - UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool allow_no_password=true, bool allow_plaintext_password=true) const; + UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool allow_no_password, bool allow_plaintext_password) const; std::optional authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const; protected: @@ -164,8 +165,6 @@ protected: virtual std::optional authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const; virtual bool areCredentialsValid(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const; virtual bool isAddressAllowed(const User & user, const Poco::Net::IPAddress & address) const; - static bool isPlaintextPasswordAllowed(const User & user, bool allow_plaintext_password) ; - static bool isNoPasswordAllowed(const User & user, bool allow_no_password); static UUID generateRandomID(); Poco::Logger * getLogger() const; static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); } @@ -181,7 +180,7 @@ protected: [[noreturn]] void throwReadonlyCannotRemove(AccessEntityType type, const String & name) const; [[noreturn]] static void throwAddressNotAllowed(const Poco::Net::IPAddress & address); [[noreturn]] static void throwInvalidCredentials(); - [[noreturn]] static void throwPasswordTypeNotAllowed(); + [[noreturn]] static void throwAuthenticationTypeNotAllowed(AuthenticationType auth_type); using Notification = std::tuple; using Notifications = std::vector; static void notify(const Notifications & notifications); diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index dd1c50343f2..4cf42a5017c 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -481,7 +481,9 @@ std::optional LDAPAccessStorage::authenticateImpl( const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, - bool throw_if_user_not_exists,bool allow_no_password __attribute__((unused)), bool allow_plaintext_password __attribute__((unused))) const + bool throw_if_user_not_exists, + bool /* allow_no_password */, + bool /* allow_plaintext_password */) const { std::scoped_lock lock(mutex); auto id = memory_storage.find(credentials.getUserName()); diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index c988a4d374a..359214eac9f 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -449,14 +449,20 @@ void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock } -std::optional MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists,bool allow_no_password, bool allow_plaintext_password) const +std::optional +MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, + const ExternalAuthenticators & external_authenticators, + bool throw_if_user_not_exists, + bool allow_no_password, bool allow_plaintext_password) const { auto storages = getStoragesInternal(); for (size_t i = 0; i != storages->size(); ++i) { const auto & storage = (*storages)[i]; bool is_last_storage = (i == storages->size() - 1); - auto id = storage->authenticate(credentials, address, external_authenticators, (throw_if_user_not_exists && is_last_storage), allow_no_password, allow_plaintext_password); + auto id = storage->authenticate(credentials, address, external_authenticators, + (throw_if_user_not_exists && is_last_storage), + allow_no_password, allow_plaintext_password); if (id) { std::lock_guard lock{mutex}; diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index b2bdebfcf6c..fe8e6d1d6c0 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -28,8 +28,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; extern const int NOT_IMPLEMENTED; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - } namespace @@ -50,7 +48,7 @@ namespace UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); } - UserPtr parseUser(const Poco::Util::AbstractConfiguration & config, const String & user_name) + UserPtr parseUser(const Poco::Util::AbstractConfiguration & config, const String & user_name, bool allow_no_password, bool allow_plaintext_password) { auto user = std::make_shared(); user->setName(user_name); @@ -130,6 +128,15 @@ namespace user->auth_data.setSSLCertificateCommonNames(std::move(common_names)); } + auto auth_type = user->auth_data.getType(); + if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || + ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", + toString(auth_type), AuthenticationTypeInfo::get(auth_type).name); + } + const auto profile_name_config = user_config + ".profile"; if (config.has(profile_name_config)) { @@ -225,24 +232,18 @@ namespace } - std::vector parseUsers(const Poco::Util::AbstractConfiguration & config, Fn auto && is_no_password_allowed_function, Fn auto && is_plaintext_password_allowed_function) + std::vector parseUsers(const Poco::Util::AbstractConfiguration & config, bool allow_no_password, bool allow_plaintext_password) { Poco::Util::AbstractConfiguration::Keys user_names; config.keys("users", user_names); std::vector users; users.reserve(user_names.size()); - bool allow_plaintext_password = is_plaintext_password_allowed_function(); - bool allow_no_password = is_no_password_allowed_function(); for (const auto & user_name : user_names) { try { - String user_config = "users." + user_name; - if ((config.has(user_config + ".password") && !allow_plaintext_password) || (config.has(user_config + ".no_password") && !allow_no_password)) - throw Exception("Incorrect User configuration. User is not allowed to configure PLAINTEXT_PASSWORD or NO_PASSWORD. Please configure User with authtype SHA256_PASSWORD_HASH, SHA256_PASSWORD, DOUBLE_SHA1_PASSWORD OR enable setting allow_plaintext_and_no_password in server configuration to configure user with plaintext and no password Auth_Type" - " Though it is not recommended to use plaintext_password and No_password for user authentication.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - users.push_back(parseUser(config, user_name)); + users.push_back(parseUser(config, user_name, allow_no_password, allow_plaintext_password)); } catch (Exception & e) { @@ -562,8 +563,10 @@ void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfigu { try { + bool no_password_allowed = is_no_password_allowed_function(); + bool plaintext_password_allowed = is_plaintext_password_allowed_function(); std::vector> all_entities; - for (const auto & entity : parseUsers(config,is_no_password_allowed_function, is_plaintext_password_allowed_function)) + for (const auto & entity : parseUsers(config, no_password_allowed, plaintext_password_allowed)) all_entities.emplace_back(generateID(*entity), entity); for (const auto & entity : parseQuotas(config)) all_entities.emplace_back(generateID(*entity), entity); diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp index ab6fdc8fd7e..4d7901a7fac 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp @@ -39,7 +39,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept } template -static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args) +IAggregateFunction * create(const IDataType & second_type, TArgs && ... args) { const WhichDataType which(second_type); @@ -51,7 +51,7 @@ static IAggregateFunction * create(const IDataType & second_type, TArgs && ... a // Not using helper functions because there are no templates for binary decimal/numeric function. template -static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args) +IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args) { const WhichDataType which(first_type); diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp index f1c6e7c6112..3b43d9a85f8 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp @@ -30,7 +30,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum( throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - DataTypePtr data_type = arguments[0]; + const DataTypePtr & data_type = arguments[0]; if (isInteger(data_type) || isFloat(data_type)) return AggregateFunctionPtr(createWithNumericType( diff --git a/src/AggregateFunctions/AggregateFunctionFactory.h b/src/AggregateFunctions/AggregateFunctionFactory.h index ef5740733df..e5263a54d79 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.h +++ b/src/AggregateFunctions/AggregateFunctionFactory.h @@ -38,7 +38,8 @@ struct AggregateFunctionWithProperties AggregateFunctionWithProperties(const AggregateFunctionWithProperties &) = default; AggregateFunctionWithProperties & operator = (const AggregateFunctionWithProperties &) = default; - template > * = nullptr> + template + requires (!std::is_same_v) AggregateFunctionWithProperties(Creator creator_, AggregateFunctionProperties properties_ = {}) /// NOLINT : creator(std::forward(creator_)), properties(std::move(properties_)) { diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 5a9fd778277..85075d5a4d6 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -20,7 +20,7 @@ namespace { template