diff --git a/.clang-tidy b/.clang-tidy index 0400b500e5c..ca84a4834e5 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -210,3 +210,6 @@ CheckOptions: value: false - key: performance-move-const-arg.CheckTriviallyCopyableMove value: false + # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097 + - key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp + value: expr-type diff --git a/.github/ISSUE_TEMPLATE/10_question.md b/.github/ISSUE_TEMPLATE/10_question.md index a112b9599d5..5b3d00a3180 100644 --- a/.github/ISSUE_TEMPLATE/10_question.md +++ b/.github/ISSUE_TEMPLATE/10_question.md @@ -7,6 +7,6 @@ assignees: '' --- -> Make sure to check documentation https://clickhouse.yandex/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse +> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse > If you still prefer GitHub issues, remove all this text and ask your question here. diff --git a/.github/ISSUE_TEMPLATE/50_build-issue.md b/.github/ISSUE_TEMPLATE/50_build-issue.md index a358575cd7c..9b05fbbdd13 100644 --- a/.github/ISSUE_TEMPLATE/50_build-issue.md +++ b/.github/ISSUE_TEMPLATE/50_build-issue.md @@ -7,7 +7,7 @@ assignees: '' --- -> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.yandex/docs/en/development/build/ +> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/ **Operating system** diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 5816a58081d..081fa165c68 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -149,7 +149,6 @@ jobs: sudo rm -fr "$TEMP_PATH" SplitBuildSmokeTest: needs: [BuilderDebSplitted] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, style-checker] steps: - name: Set envs @@ -316,7 +315,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -360,6 +358,51 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinGCC: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=binary_gcc + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -590,7 +633,6 @@ jobs: ########################################################################################## BuilderDebSplitted: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -636,7 +678,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinTidy: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -682,7 +723,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwin: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -728,7 +768,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinAarch64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -774,7 +813,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinFreeBSD: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -820,7 +858,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwinAarch64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -866,7 +903,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinPPC64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -911,6 +947,34 @@ jobs: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ +##################################### Docker images ####################################### +############################################################################################ + DockerServerImages: + needs: + - BuilderDebRelease + - BuilderDebAarch64 + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head + python3 docker_server.py --release-type head --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" +############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: @@ -918,6 +982,7 @@ jobs: - BuilderDebRelease - BuilderDebAarch64 - BuilderBinRelease + - BuilderBinGCC - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan @@ -2608,6 +2673,40 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" + UnitTestsReleaseGCC: + needs: [BuilderBinGCC] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/unit_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Unit tests (release-gcc, actions) + REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Unit test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 unit_tests_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a172947b2fc..5b47f94a324 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -7,6 +7,7 @@ env: "on": schedule: - cron: '13 3 * * *' + workflow_dispatch: jobs: DockerHubPushAarch64: diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 4a3880543c4..c01d1821d0f 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -4,7 +4,7 @@ env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 -on: # yamllint disable-line rule:truthy +on: # yamllint disable-line rule:truthy pull_request: types: - synchronize @@ -370,6 +370,48 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinGCC: + needs: [DockerHubPush, FastTest] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=binary_gcc + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderDebAarch64: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -956,6 +998,34 @@ jobs: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ +##################################### Docker images ####################################### +############################################################################################ + DockerServerImages: + needs: + - BuilderDebRelease + - BuilderDebAarch64 + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type head --no-push + python3 docker_server.py --release-type head --no-push --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" +############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ BuilderReport: @@ -963,6 +1033,7 @@ jobs: - BuilderDebRelease - BuilderDebAarch64 - BuilderBinRelease + - BuilderBinGCC - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan @@ -1733,6 +1804,51 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" + TestsBugfixCheck: + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/tests_bugfix_check + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Tests bugfix validate check (actions) + KILL_TIMEOUT=3600 + REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Bugfix test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + + TEMP_PATH="${TEMP_PATH}/integration" \ + REPORTS_PATH="${REPORTS_PATH}/integration" \ + python3 integration_test_check.py "Integration tests bugfix validate check" \ + --validate-bugfix --post-commit-status=file || echo 'ignore exit code' + + TEMP_PATH="${TEMP_PATH}/stateless" \ + REPORTS_PATH="${REPORTS_PATH}/stateless" \ + python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \ + --validate-bugfix --post-commit-status=file || echo 'ignore exit code' + + python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################## ############################ FUNCTIONAl STATEFUL TESTS ####################################### ############################################################################################## @@ -2763,6 +2879,40 @@ jobs: docker kill "$(docker ps -q)" ||: docker rm -f "$(docker ps -a -q)" ||: sudo rm -fr "$TEMP_PATH" + UnitTestsReleaseGCC: + needs: [BuilderBinGCC] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/unit_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Unit tests (release-gcc, actions) + REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Unit test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 unit_tests_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -3016,6 +3166,7 @@ jobs: needs: - StyleCheck - DockerHubPush + - DockerServerImages - CheckLabels - BuilderReport - FastTest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bd62e64409f..29e3d0c4358 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -36,3 +36,28 @@ jobs: overwrite: true tag: ${{ github.ref }} file_glob: true + ############################################################################################ + ##################################### Docker images ####################################### + ############################################################################################ + DockerServerImages: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no version info + - name: Check docker clickhouse/clickhouse-server building + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_server.py --release-type auto + python3 docker_server.py --release-type auto --no-ubuntu \ + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + - name: Cleanup + if: always() + run: | + docker kill "$(docker ps -q)" ||: + docker rm -f "$(docker ps -a -q)" ||: + sudo rm -fr "$TEMP_PATH" diff --git a/CHANGELOG.md b/CHANGELOG.md index 61724ab2d0c..100b03ab92b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). * Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). +* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). #### New Feature @@ -366,7 +367,7 @@ #### Improvement -* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. +* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. * Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). * If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/CMakeLists.txt b/CMakeLists.txt index 9649fc32d74..a9ce64b87ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -261,13 +261,16 @@ endif () # Add a section with the hash of the compiled machine code for integrity checks. # Only for official builds, because adding a section can be time consuming (rewrite of several GB). # And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary) -if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) - set (USE_BINARY_HASH 1) +if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$")) + set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section") endif () # Allows to build stripped binary in a separate directory -if (OBJCOPY_PATH AND READELF_PATH) - set(BUILD_STRIPPED_BINARIES_PREFIX "" CACHE STRING "Build stripped binaries with debug info in separate directory") +if (OBJCOPY_PATH AND STRIP_PATH) + option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF) + if (INSTALL_STRIPPED_BINARIES) + set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") + endif() endif() cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 8a1ca6064cb..3cfd2f6906a 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -2,6 +2,7 @@ set (SRCS argsToConfig.cpp coverage.cpp demangle.cpp + getAvailableMemoryAmount.cpp getFQDNOrHostName.cpp getMemoryAmount.cpp getPageSize.cpp diff --git a/base/base/getAvailableMemoryAmount.cpp b/base/base/getAvailableMemoryAmount.cpp new file mode 100644 index 00000000000..d2f794e8952 --- /dev/null +++ b/base/base/getAvailableMemoryAmount.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include + +#include +#include +#include +#if defined(BSD) +#include +#include +#endif + + +uint64_t getAvailableMemoryAmountOrZero() +{ +#if defined(_SC_AVPHYS_PAGES) // linux + return getPageSize() * sysconf(_SC_AVPHYS_PAGES); +#elif defined(__FreeBSD__) + struct vmtotal vmt; + size_t vmt_size = sizeof(vmt); + if (sysctlbyname("vm.vmtotal", &vmt, &vmt_size, NULL, 0) == 0) + return getPageSize() * vmt.t_avm; + else + return 0; +#else // darwin + unsigned int usermem; + size_t len = sizeof(usermem); + static int mib[2] = { CTL_HW, HW_USERMEM }; + if (sysctl(mib, 2, &usermem, &len, nullptr, 0) == 0 && len == sizeof(usermem)) + return usermem; + else + return 0; +#endif +} + + +uint64_t getAvailableMemoryAmount() +{ + auto res = getAvailableMemoryAmountOrZero(); + if (!res) + throw std::runtime_error("Cannot determine available memory amount"); + return res; +} diff --git a/base/base/getAvailableMemoryAmount.h b/base/base/getAvailableMemoryAmount.h new file mode 100644 index 00000000000..44612945016 --- /dev/null +++ b/base/base/getAvailableMemoryAmount.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +/** Returns the size of currently available physical memory (RAM) in bytes. + * Returns 0 on unsupported platform or if it cannot determine the size of physical memory. + */ +uint64_t getAvailableMemoryAmountOrZero(); + +/** Throws exception if it cannot determine the size of physical memory. + */ +uint64_t getAvailableMemoryAmount(); diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index ddec09121e1..ef7ec6d7fc0 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -51,6 +51,6 @@ if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") -elseif (YANDEX_OFFICIAL_BUILD) +elseif (CLICKHOUSE_OFFICIAL_BUILD) message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.") endif () diff --git a/benchmark/greenplum/result_parser.py b/benchmark/greenplum/result_parser.py index 8af20d265a0..4ed1aa5c4a5 100755 --- a/benchmark/greenplum/result_parser.py +++ b/benchmark/greenplum/result_parser.py @@ -4,11 +4,12 @@ import sys import json + def parse_block(block=[], options=[]): - #print('block is here', block) - #show_query = False - #show_query = options.show_query + # print('block is here', block) + # show_query = False + # show_query = options.show_query result = [] query = block[0].strip() if len(block) > 4: @@ -20,9 +21,9 @@ def parse_block(block=[], options=[]): timing2 = block[2].strip().split()[1] timing3 = block[3].strip().split()[1] if options.show_queries: - result.append( query ) + result.append(query) if not options.show_first_timings: - result += [ timing1 , timing2, timing3 ] + result += [timing1, timing2, timing3] else: result.append(timing1) return result @@ -37,12 +38,12 @@ def read_stats_file(options, fname): for line in f.readlines(): - if 'SELECT' in line: + if "SELECT" in line: if len(block) > 1: - result.append( parse_block(block, options) ) - block = [ line ] - elif 'Time:' in line: - block.append( line ) + result.append(parse_block(block, options)) + block = [line] + elif "Time:" in line: + block.append(line) return result @@ -50,7 +51,7 @@ def read_stats_file(options, fname): def compare_stats_files(options, arguments): result = [] file_output = [] - pyplot_colors = ['y', 'b', 'g', 'r'] + pyplot_colors = ["y", "b", "g", "r"] for fname in arguments[1:]: file_output.append((read_stats_file(options, fname))) if len(file_output[0]) > 0: @@ -58,65 +59,92 @@ def compare_stats_files(options, arguments): for idx, data_set in enumerate(file_output): int_result = [] for timing in data_set: - int_result.append(float(timing[0])) #y values - result.append([[x for x in range(0, len(int_result)) ], int_result, -pyplot_colors[idx] + '^' ] ) -# result.append([x for x in range(1, len(int_result)) ]) #x values -# result.append( pyplot_colors[idx] + '^' ) + int_result.append(float(timing[0])) # y values + result.append( + [ + [x for x in range(0, len(int_result))], + int_result, + pyplot_colors[idx] + "^", + ] + ) + # result.append([x for x in range(1, len(int_result)) ]) #x values + # result.append( pyplot_colors[idx] + '^' ) return result + def parse_args(): from optparse import OptionParser - parser = OptionParser(usage='usage: %prog [options] [result_file_path]..') - parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries") - parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings") - parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode") + + parser = OptionParser(usage="usage: %prog [options] [result_file_path]..") + parser.add_option( + "-q", + "--show-queries", + help="Show statements along with timings", + action="store_true", + dest="show_queries", + ) + parser.add_option( + "-f", + "--show-first-timings", + help="Show only first tries timings", + action="store_true", + dest="show_first_timings", + ) + parser.add_option( + "-c", + "--compare-mode", + help="Prepare output for pyplot comparing result files.", + action="store", + dest="compare_mode", + ) (options, arguments) = parser.parse_args(sys.argv) if len(arguments) < 2: parser.print_usage() sys.exit(1) - return ( options, arguments ) + return (options, arguments) + def gen_pyplot_code(options, arguments): - result = '' + result = "" data_sets = compare_stats_files(options, arguments) for idx, data_set in enumerate(data_sets, start=0): x_values, y_values, line_style = data_set - result += '\nplt.plot(' - result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style ) - result += ', label=\'%s try\')' % idx - print('import matplotlib.pyplot as plt') + result += "\nplt.plot(" + result += "%s, %s, '%s'" % (x_values, y_values, line_style) + result += ", label='%s try')" % idx + print("import matplotlib.pyplot as plt") print(result) - print( 'plt.xlabel(\'Try number\')' ) - print( 'plt.ylabel(\'Timing\')' ) - print( 'plt.title(\'Benchmark query timings\')' ) - print('plt.legend()') - print('plt.show()') + print("plt.xlabel('Try number')") + print("plt.ylabel('Timing')") + print("plt.title('Benchmark query timings')") + print("plt.legend()") + print("plt.show()") def gen_html_json(options, arguments): tuples = read_stats_file(options, arguments[1]) - print('{') + print("{") print('"system: GreenPlum(x2),') - print(('"version": "%s",' % '4.3.9.1')) + print(('"version": "%s",' % "4.3.9.1")) print('"data_size": 10000000,') print('"time": "",') print('"comments": "",') print('"result":') - print('[') + print("[") for s in tuples: print(s) - print(']') - print('}') + print("]") + print("}") def main(): - ( options, arguments ) = parse_args() + (options, arguments) = parse_args() if len(arguments) > 2: gen_pyplot_code(options, arguments) else: gen_html_json(options, arguments) -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/cmake/strip.sh b/cmake/strip.sh deleted file mode 100755 index de596887159..00000000000 --- a/cmake/strip.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash - -BINARY_PATH=$1 -BINARY_NAME=$(basename $BINARY_PATH) -DESTINATION_STRIPPED_DIR=$2 -OBJCOPY_PATH=${3:objcopy} -READELF_PATH=${4:readelf} - -BUILD_ID=$($READELF_PATH -n $1 | sed -n '/Build ID/ { s/.*: //p; q; }') -BUILD_ID_PREFIX=${BUILD_ID:0:2} -BUILD_ID_SUFFIX=${BUILD_ID:2} -TEMP_BINARY_PATH="${BINARY_PATH}_temp" - -DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id" -DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin" - -mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX" -mkdir -p "$DESTINATION_STRIP_BINARY_DIR" - -$OBJCOPY_PATH --only-keep-debug "$BINARY_PATH" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" - -touch "$TEMP_BINARY_PATH" -$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$BINARY_PATH" "$TEMP_BINARY_PATH" -$OBJCOPY_PATH --strip-all "$TEMP_BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" -rm -f "$TEMP_BINARY_PATH" diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index e430807772d..1f24790a159 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -11,16 +11,43 @@ macro(clickhouse_strip_binary) message(FATAL_ERROR "A binary path name must be provided for stripping binary") endif() - if (NOT DEFINED STRIP_DESTINATION_DIR) message(FATAL_ERROR "Destination directory for stripped binary must be provided") endif() add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD - COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} - COMMENT "Stripping clickhouse binary" VERBATIM + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin" + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin" + COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMENT "Stripping clickhouse binary" VERBATIM ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) + install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse) +endmacro() + + +macro(clickhouse_make_empty_debug_info_for_nfpm) + set(oneValueArgs TARGET DESTINATION_DIR) + cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN}) + + if (NOT DEFINED EMPTY_DEBUG_TARGET) + message(FATAL_ERROR "A target name must be provided for stripping binary") + endif() + + if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR) + message(FATAL_ERROR "Destination directory for empty debug must be provided") + endif() + + add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD + COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug" + COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" + COMMENT "Addiding empty debug info for NFPM" VERBATIM + ) + + install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse) endmacro() diff --git a/cmake/tools.cmake b/cmake/tools.cmake index d6fddd0509e..d571a46ad26 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -170,32 +170,32 @@ else () message (FATAL_ERROR "Cannot find objcopy.") endif () -# Readelf (FIXME copypaste) +# Strip (FIXME copypaste) if (COMPILER_GCC) - find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip") else () - find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () -if (NOT READELF_PATH AND OS_DARWIN) +if (NOT STRIP_PATH AND OS_DARWIN) find_program (BREW_PATH NAMES "brew") if (BREW_PATH) execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) if (LLVM_PREFIX) - find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) endif () - if (NOT READELF_PATH) + if (NOT STRIP_PATH) execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) if (BINUTILS_PREFIX) - find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) endif () endif () endif () endif () -if (READELF_PATH) - message (STATUS "Using readelf: ${READELF_PATH}") +if (STRIP_PATH) + message (STATUS "Using strip: ${STRIP_PATH}") else () - message (FATAL_ERROR "Cannot find readelf.") + message (FATAL_ERROR "Cannot find strip.") endif () diff --git a/cmake/version.cmake b/cmake/version.cmake index 963f291c0f3..acaa772ff2f 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -18,6 +18,6 @@ set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") -if(YANDEX_OFFICIAL_BUILD) +if(CLICKHOUSE_OFFICIAL_BUILD) set(VERSION_OFFICIAL " (official build)") endif() diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index d91ce40dd54..c5bda41782d 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -69,9 +69,10 @@ endif () target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS}) # create a symlink to include headers with +set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") ADD_CUSTOM_TARGET(avro_symlink_headers ALL - COMMAND ${CMAKE_COMMAND} -E make_directory "${AVROCPP_ROOT_DIR}/include" - COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVROCPP_ROOT_DIR}/include/avro" + COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}" + COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro" ) add_dependencies(_avrocpp avro_symlink_headers) -target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVROCPP_ROOT_DIR}/include") +target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}") diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 0215c68e683..3d66bc97971 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -27,7 +27,11 @@ target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRAR # asio -target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1) +target_compile_definitions (_boost_headers_only INTERFACE + BOOST_ASIO_STANDALONE=1 + # Avoid using of deprecated in c++ > 17 std::result_of + BOOST_ASIO_HAS_STD_INVOKE_RESULT=1 +) # iostreams diff --git a/contrib/hyperscan b/contrib/hyperscan index e9f08df0213..5edc68c5ac6 160000 --- a/contrib/hyperscan +++ b/contrib/hyperscan @@ -1 +1 @@ -Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa +Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c diff --git a/contrib/libcxx b/contrib/libcxx index 61e60294b1d..172b2ae074f 160000 --- a/contrib/libcxx +++ b/contrib/libcxx @@ -1 +1 @@ -Subproject commit 61e60294b1de01483caa9f5d00f437c99b674de6 +Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239 diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 332fb0411cd..dc9df48b2c1 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -18,12 +18,14 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp" +"${LIBCXX_SOURCE_DIR}/src/format.cpp" "${LIBCXX_SOURCE_DIR}/src/functional.cpp" "${LIBCXX_SOURCE_DIR}/src/future.cpp" "${LIBCXX_SOURCE_DIR}/src/hash.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp" "${LIBCXX_SOURCE_DIR}/src/iostream.cpp" +"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp" "${LIBCXX_SOURCE_DIR}/src/locale.cpp" "${LIBCXX_SOURCE_DIR}/src/memory.cpp" "${LIBCXX_SOURCE_DIR}/src/mutex.cpp" @@ -33,6 +35,9 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/random.cpp" "${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp" "${LIBCXX_SOURCE_DIR}/src/regex.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp" "${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp" "${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp" "${LIBCXX_SOURCE_DIR}/src/string.cpp" @@ -49,7 +54,9 @@ set(SRCS add_library(cxx ${SRCS}) set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") -target_include_directories(cxx SYSTEM BEFORE PUBLIC $) +target_include_directories(cxx SYSTEM BEFORE PUBLIC + $ + $/src) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. diff --git a/contrib/libcxxabi b/contrib/libcxxabi index df8f1e727db..6eb7cc7a7bd 160000 --- a/contrib/libcxxabi +++ b/contrib/libcxxabi @@ -1 +1 @@ -Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076 +Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7 diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 425111d9b26..bf1ede8a60e 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -1,24 +1,24 @@ set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi") set(SRCS -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp" "${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp" "${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" ) add_library(cxxabi ${SRCS}) @@ -30,6 +30,7 @@ target_compile_options(cxxabi PRIVATE -w) target_include_directories(cxxabi SYSTEM BEFORE PUBLIC $ PRIVATE $ + PRIVATE $ ) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. diff --git a/contrib/libxml2 b/contrib/libxml2 index 18890f471c4..a075d256fd9 160000 --- a/contrib/libxml2 +++ b/contrib/libxml2 @@ -1 +1 @@ -Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf +Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca diff --git a/contrib/llvm-cmake/CMakeLists.txt b/contrib/llvm-cmake/CMakeLists.txt index 6ff07f0e016..87c8a65510f 100644 --- a/contrib/llvm-cmake/CMakeLists.txt +++ b/contrib/llvm-cmake/CMakeLists.txt @@ -1,12 +1,9 @@ -# During cross-compilation in our CI we have to use llvm-tblgen and other building tools -# tools to be build for host architecture and everything else for target architecture (e.g. AArch64) -# Possible workaround is to use llvm-tblgen from some package... -# But lets just enable LLVM for native builds -if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined") - set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) +if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") + set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() - set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) + set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) endif() + option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) if (NOT ENABLE_EMBEDDED_COMPILER) diff --git a/contrib/replxx b/contrib/replxx index 9460e5e0fc1..6f0b6f151ae 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d +Subproject commit 6f0b6f151ae2a044625ae93acd19ca365fcea64d diff --git a/docker/docs/check/Dockerfile b/docker/docs/check/Dockerfile index 174be123eed..4eb03a91e7a 100644 --- a/docker/docs/check/Dockerfile +++ b/docker/docs/check/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #33610 # docker build -t clickhouse/docs-check . ARG FROM_TAG=latest FROM clickhouse/docs-builder:$FROM_TAG diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile new file mode 100644 index 00000000000..207dddce1bb --- /dev/null +++ b/docker/keeper/Dockerfile @@ -0,0 +1,72 @@ +FROM ubuntu:20.04 AS glibc-donor + +ARG TARGETARCH +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && ln -s "${rarch}-linux-gnu" /lib/linux-gnu + + +FROM alpine + +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 \ + TZ=UTC \ + CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml + +COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/ +COPY --from=glibc-donor /etc/nsswitch.conf /etc/ +COPY entrypoint.sh /entrypoint.sh +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \ + arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \ + esac + +ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" +ARG VERSION="22.4.1.917" +ARG PACKAGES="clickhouse-keeper" + +# user/group precreated explicitly with fixed uid/gid on purpose. +# It is especially important for rootless containers: in that case entrypoint +# can't do chown and owners of mounted volumes should be configured externally. +# We do that in advance at the begining of Dockerfile before any packages will be +# installed to prevent picking those uid / gid by some unrelated software. +# The same uid / gid (101) is used both for alpine and ubuntu. + + +ARG TARGETARCH +RUN arch=${TARGETARCH:-amd64} \ + && for package in ${PACKAGES}; do \ + { \ + { echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \ + } || \ + { echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \ + } ; \ + } || exit 1 \ + ; done \ + && rm /tmp/*.tgz /install -r \ + && addgroup -S -g 101 clickhouse \ + && adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse keeper" -u 101 clickhouse \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper \ + && chown clickhouse:clickhouse /var/lib/clickhouse \ + && chown root:clickhouse /var/log/clickhouse-keeper \ + && chmod +x /entrypoint.sh \ + && apk add --no-cache su-exec bash tzdata \ + && cp /usr/share/zoneinfo/UTC /etc/localtime \ + && echo "UTC" > /etc/timezone \ + && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper + + +EXPOSE 2181 10181 44444 + +VOLUME /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/keeper/Dockerfile.alpine b/docker/keeper/Dockerfile.alpine new file mode 120000 index 00000000000..1d1fe94df49 --- /dev/null +++ b/docker/keeper/Dockerfile.alpine @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file diff --git a/docker/keeper/entrypoint.sh b/docker/keeper/entrypoint.sh new file mode 100644 index 00000000000..3aacf655c28 --- /dev/null +++ b/docker/keeper/entrypoint.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +set +x +set -eo pipefail +shopt -s nullglob + +DO_CHOWN=1 +if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then + DO_CHOWN=0 +fi + +CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}" +CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}" + +# support --user +if [ "$(id -u)" = "0" ]; then + USER=$CLICKHOUSE_UID + GROUP=$CLICKHOUSE_GID + if command -v gosu &> /dev/null; then + gosu="gosu $USER:$GROUP" + elif command -v su-exec &> /dev/null; then + gosu="su-exec $USER:$GROUP" + else + echo "No gosu/su-exec detected!" + exit 1 + fi +else + USER="$(id -u)" + GROUP="$(id -g)" + gosu="" + DO_CHOWN=0 +fi + +KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/config.yaml}" + +if [ -f "$KEEPER_CONFIG" ] && ! $gosu test -f "$KEEPER_CONFIG" -a -r "$KEEPER_CONFIG"; then + echo "Configuration file '$KEEPER_CONFIG' isn't readable by user with id '$USER'" + exit 1 +fi + +DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}" +LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}" +LOG_PATH="${LOG_DIR}/clickhouse-keeper.log" +ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log" +COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log" +COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots" +CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0} + +for dir in "$DATA_DIR" \ + "$LOG_DIR" \ + "$TMP_DIR" \ + "$COORDINATION_LOG_DIR" \ + "$COORDINATION_SNAPSHOT_DIR" +do + # check if variable not empty + [ -z "$dir" ] && continue + # ensure directories exist + if ! mkdir -p "$dir"; then + echo "Couldn't create necessary directory: $dir" + exit 1 + fi + + if [ "$DO_CHOWN" = "1" ]; then + # ensure proper directories permissions + # but skip it for if directory already has proper premissions, cause recursive chown may be slow + if [ "$(stat -c %u "$dir")" != "$USER" ] || [ "$(stat -c %g "$dir")" != "$GROUP" ]; then + chown -R "$USER:$GROUP" "$dir" + fi + elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then + echo "Necessary directory '$dir' isn't accessible by user with id '$USER'" + exit 1 + fi +done + +# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments +if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then + # Watchdog is launched by default, but does not send SIGINT to the main process, + # so the container can't be finished by ctrl+c + export CLICKHOUSE_WATCHDOG_ENABLE + + cd /var/lib/clickhouse + + # There is a config file. It is already tested with gosu (if it is readably by keeper user) + if [ -f "$KEEPER_CONFIG" ]; then + exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@" + fi + + # There is no config file. Will use embedded one + exec $gosu /usr/bin/clickhouse-keeper --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@" +fi + +# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image +exec "$@" diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index e3e2e689b17..a57a734e3df 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -95,6 +95,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ && apt-get install gcc-11 g++-11 --yes \ && apt-get clean +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH +ARG NFPM_VERSION=2.15.0 + +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \ + && dpkg -i /tmp/nfpm.deb \ + && rm /tmp/nfpm.deb COPY build.sh / -CMD ["bash", "-c", "/build.sh 2>&1 | ts"] +CMD ["bash", "-c", "/build.sh 2>&1"] diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 2f18b07ffe1..31416e1a0ee 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -1,7 +1,13 @@ #!/usr/bin/env bash +exec &> >(ts) set -x -e +cache_status () { + ccache --show-config ||: + ccache --show-stats ||: +} + mkdir -p build/cmake/toolchain/darwin-x86_64 tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 @@ -19,15 +25,23 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" env cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. -ccache --show-config ||: -ccache --show-stats ||: +cache_status +# clear cache stats ccache --zero-stats ||: -# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. +# No quotes because I want it to expand to nothing if empty. +# shellcheck disable=SC2086 ninja $NINJA_FLAGS clickhouse-bundle -ccache --show-config ||: -ccache --show-stats ||: +cache_status + +if [ -n "$MAKE_DEB" ]; then + rm -rf /build/packages/root + # No quotes because I want it to expand to nothing if empty. + # shellcheck disable=SC2086 + DESTDIR=/build/packages/root ninja $NINJA_FLAGS install + bash -x /build/packages/build +fi mv ./programs/clickhouse* /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds @@ -84,8 +98,7 @@ fi # ../docker/packager/other/fuzzer.sh # fi -ccache --show-config ||: -ccache --show-stats ||: +cache_status if [ "${CCACHE_DEBUG:-}" == "1" ] then diff --git a/docker/packager/packager b/docker/packager/packager index 05b2e02df96..f82d402d613 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -#-*- coding: utf-8 -*- +# -*- coding: utf-8 -*- import subprocess import os import argparse @@ -8,36 +8,39 @@ import sys SCRIPT_PATH = os.path.realpath(__file__) -IMAGE_MAP = { - "deb": "clickhouse/deb-builder", - "binary": "clickhouse/binary-builder", -} def check_image_exists_locally(image_name): try: - output = subprocess.check_output("docker images -q {} 2> /dev/null".format(image_name), shell=True) + output = subprocess.check_output( + f"docker images -q {image_name} 2> /dev/null", shell=True + ) return output != "" - except subprocess.CalledProcessError as ex: + except subprocess.CalledProcessError: return False + def pull_image(image_name): try: - subprocess.check_call("docker pull {}".format(image_name), shell=True) + subprocess.check_call(f"docker pull {image_name}", shell=True) return True - except subprocess.CalledProcessError as ex: - logging.info("Cannot pull image {}".format(image_name)) + except subprocess.CalledProcessError: + logging.info(f"Cannot pull image {image_name}".format()) return False + def build_image(image_name, filepath): context = os.path.dirname(filepath) - build_cmd = "docker build --network=host -t {} -f {} {}".format(image_name, filepath, context) - logging.info("Will build image with cmd: '{}'".format(build_cmd)) + build_cmd = f"docker build --network=host -t {image_name} -f {filepath} {context}" + logging.info("Will build image with cmd: '%s'", build_cmd) subprocess.check_call( build_cmd, shell=True, ) -def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version): + +def run_docker_image_with_env( + image_name, output, env_variables, ch_root, ccache_dir, docker_image_version +): env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -47,28 +50,52 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache else: interactive = "" - cmd = "docker run --network=host --rm --volume={output_path}:/output --volume={ch_root}:/build --volume={ccache_dir}:/ccache {env} {interactive} {img_name}".format( - output_path=output, - ch_root=ch_root, - ccache_dir=ccache_dir, - env=env_part, - img_name=image_name + ":" + docker_image_version, - interactive=interactive + cmd = ( + f"docker run --network=host --rm --volume={output}:/output " + f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} " + f"{interactive} {image_name}:{docker_image_version}" ) - logging.info("Will build ClickHouse pkg with cmd: '{}'".format(cmd)) + logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd) subprocess.check_call(cmd, shell=True) -def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries): + +def is_release_build(build_type, package_type, sanitizer, split_binary): + return ( + build_type == "" + and package_type == "deb" + and sanitizer == "" + and not split_binary + ) + + +def parse_env_variables( + build_type, + compiler, + sanitizer, + package_type, + image_type, + cache, + distcc_hosts, + split_binary, + clang_tidy, + version, + author, + official, + additional_pkgs, + with_coverage, + with_binaries, +): DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" FREEBSD_SUFFIX = "-freebsd" - PPC_SUFFIX = '-ppc64le' + PPC_SUFFIX = "-ppc64le" result = [] - cmake_flags = ['$CMAKE_FLAGS'] + result.append("OUTPUT_DIR=/output") + cmake_flags = ["$CMAKE_FLAGS"] is_cross_darwin = compiler.endswith(DARWIN_SUFFIX) is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) @@ -77,46 +104,73 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) if is_cross_darwin: - cc = compiler[:-len(DARWIN_SUFFIX)] + cc = compiler[: -len(DARWIN_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar") - cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/x86_64-apple-darwin-install_name_tool") - cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib") + cmake_flags.append( + "-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/" + "x86_64-apple-darwin-install_name_tool" + ) + cmake_flags.append( + "-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib" + ) cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld") - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake") + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" + ) elif is_cross_darwin_arm: - cc = compiler[:-len(DARWIN_ARM_SUFFIX)] + cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") - cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool") - cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib") + cmake_flags.append( + "-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/" + "aarch64-apple-darwin-install_name_tool" + ) + cmake_flags.append( + "-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib" + ) cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld") - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake") + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake" + ) elif is_cross_arm: - cc = compiler[:-len(ARM_SUFFIX)] - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake") - result.append("DEB_ARCH_FLAG=-aarm64") + cc = compiler[: -len(ARM_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake" + ) + result.append("DEB_ARCH=arm64") elif is_cross_freebsd: - cc = compiler[:-len(FREEBSD_SUFFIX)] - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake") + cc = compiler[: -len(FREEBSD_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake" + ) elif is_cross_ppc: - cc = compiler[:-len(PPC_SUFFIX)] - cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake") + cc = compiler[: -len(PPC_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" + ) else: cc = compiler - result.append("DEB_ARCH_FLAG=-aamd64") + result.append("DEB_ARCH=amd64") - cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++') + cxx = cc.replace("gcc", "g++").replace("clang", "clang++") if image_type == "deb": - result.append("DEB_CC={}".format(cc)) - result.append("DEB_CXX={}".format(cxx)) - # For building fuzzers - result.append("CC={}".format(cc)) - result.append("CXX={}".format(cxx)) - elif image_type == "binary": - result.append("CC={}".format(cc)) - result.append("CXX={}".format(cxx)) - cmake_flags.append('-DCMAKE_C_COMPILER=`which {}`'.format(cc)) - cmake_flags.append('-DCMAKE_CXX_COMPILER=`which {}`'.format(cxx)) + result.append("MAKE_DEB=true") + cmake_flags.append("-DENABLE_TESTS=0") + cmake_flags.append("-DENABLE_UTILS=0") + cmake_flags.append("-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON") + cmake_flags.append("-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON") + cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON") + cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr") + cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc") + cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") + cmake_flags.append("-DBUILD_STANDALONE_KEEPER=ON") + if is_release_build(build_type, package_type, sanitizer, split_binary): + cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON") + + result.append(f"CC={cc}") + result.append(f"CXX={cxx}") + cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}") + cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") # Create combined output archive for split build and for performance tests. if package_type == "performance": @@ -126,12 +180,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ result.append("COMBINED_OUTPUT=shared_build") if sanitizer: - result.append("SANITIZER={}".format(sanitizer)) + result.append(f"SANITIZER={sanitizer}") if build_type: - result.append("BUILD_TYPE={}".format(build_type)) + result.append(f"BUILD_TYPE={build_type.capitalize()}") + else: + result.append("BUILD_TYPE=None") - if cache == 'distcc': - result.append("CCACHE_PREFIX={}".format(cache)) + if cache == "distcc": + result.append(f"CCACHE_PREFIX={cache}") if cache: result.append("CCACHE_DIR=/ccache") @@ -142,109 +198,188 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ # result.append("CCACHE_UMASK=777") if distcc_hosts: - hosts_with_params = ["{}/24,lzo".format(host) for host in distcc_hosts] + ["localhost/`nproc`"] - result.append('DISTCC_HOSTS="{}"'.format(" ".join(hosts_with_params))) + hosts_with_params = [f"{host}/24,lzo" for host in distcc_hosts] + [ + "localhost/`nproc`" + ] + result.append('DISTCC_HOSTS="' + " ".join(hosts_with_params) + '"') elif cache == "distcc": - result.append('DISTCC_HOSTS="{}"'.format("localhost/`nproc`")) + result.append('DISTCC_HOSTS="localhost/`nproc`"') - if alien_pkgs: - result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'") + if additional_pkgs: + result.append("MAKE_APK=true") + result.append("MAKE_RPM=true") + result.append("MAKE_TGZ=true") if with_binaries == "programs": - result.append('BINARY_OUTPUT=programs') + result.append("BINARY_OUTPUT=programs") elif with_binaries == "tests": - result.append('ENABLE_TESTS=1') - result.append('BINARY_OUTPUT=tests') - cmake_flags.append('-DENABLE_TESTS=1') + result.append("ENABLE_TESTS=1") + result.append("BINARY_OUTPUT=tests") + cmake_flags.append("-DENABLE_TESTS=1") if split_binary: - cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') + cmake_flags.append( + "-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 " + "-DCLICKHOUSE_SPLIT_BINARY=1" + ) # We can't always build utils because it requires too much space, but # we have to build them at least in some way in CI. The split build is # probably the least heavy disk-wise. - cmake_flags.append('-DENABLE_UTILS=1') + cmake_flags.append("-DENABLE_UTILS=1") if clang_tidy: - cmake_flags.append('-DENABLE_CLANG_TIDY=1') - cmake_flags.append('-DENABLE_UTILS=1') - cmake_flags.append('-DENABLE_TESTS=1') - cmake_flags.append('-DENABLE_EXAMPLES=1') + cmake_flags.append("-DENABLE_CLANG_TIDY=1") + cmake_flags.append("-DENABLE_UTILS=1") + cmake_flags.append("-DENABLE_TESTS=1") + cmake_flags.append("-DENABLE_EXAMPLES=1") # Don't stop on first error to find more clang-tidy errors in one run. - result.append('NINJA_FLAGS=-k0') + result.append("NINJA_FLAGS=-k0") if with_coverage: - cmake_flags.append('-DWITH_COVERAGE=1') + cmake_flags.append("-DWITH_COVERAGE=1") if version: - result.append("VERSION_STRING='{}'".format(version)) + result.append(f"VERSION_STRING='{version}'") if author: - result.append("AUTHOR='{}'".format(author)) + result.append(f"AUTHOR='{author}'") if official: - cmake_flags.append('-DYANDEX_OFFICIAL_BUILD=1') + cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1") - result.append('CMAKE_FLAGS="' + ' '.join(cmake_flags) + '"') + result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"') return result + if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse building script using prebuilt Docker image") - # 'performance' creates a combined .tgz with server and configs to be used for performance test. - parser.add_argument("--package-type", choices=['deb', 'binary', 'performance'], required=True) - parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)) + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="ClickHouse building script using prebuilt Docker image", + ) + # 'performance' creates a combined .tgz with server + # and configs to be used for performance test. + parser.add_argument( + "--package-type", + choices=("deb", "binary", "performance"), + required=True, + help="a build type", + ) + parser.add_argument( + "--clickhouse-repo-path", + default=os.path.join( + os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir + ), + help="ClickHouse git repository", + ) parser.add_argument("--output-dir", required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") - parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", - "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", - "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", "clang-13-ppc64le", - "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") - parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") + parser.add_argument( + "--compiler", + choices=( + "clang-11", + "clang-11-darwin", + "clang-11-darwin-aarch64", + "clang-11-aarch64", + "clang-12", + "clang-12-darwin", + "clang-12-darwin-aarch64", + "clang-12-aarch64", + "clang-13", + "clang-13-darwin", + "clang-13-darwin-aarch64", + "clang-13-aarch64", + "clang-13-ppc64le", + "clang-11-freebsd", + "clang-12-freebsd", + "clang-13-freebsd", + "gcc-11", + ), + default="clang-13", + help="a compiler to use", + ) + parser.add_argument( + "--sanitizer", + choices=("address", "thread", "memory", "undefined", ""), + default="", + ) parser.add_argument("--split-binary", action="store_true") parser.add_argument("--clang-tidy", action="store_true") - parser.add_argument("--cache", choices=("", "ccache", "distcc"), default="") - parser.add_argument("--ccache_dir", default= os.getenv("HOME", "") + '/.ccache') + parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") + parser.add_argument( + "--ccache_dir", + default=os.getenv("HOME", "") + "/.ccache", + help="a directory with ccache", + ) parser.add_argument("--distcc-hosts", nargs="+") parser.add_argument("--force-build-image", action="store_true") parser.add_argument("--version") - parser.add_argument("--author", default="clickhouse") + parser.add_argument("--author", default="clickhouse", help="a package author") parser.add_argument("--official", action="store_true") - parser.add_argument("--alien-pkgs", nargs='+', default=[]) + parser.add_argument("--additional-pkgs", action="store_true") parser.add_argument("--with-coverage", action="store_true") - parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="") - parser.add_argument("--docker-image-version", default="latest") + parser.add_argument( + "--with-binaries", choices=("programs", "tests", ""), default="" + ) + parser.add_argument( + "--docker-image-version", default="latest", help="docker image tag to use" + ) args = parser.parse_args() if not os.path.isabs(args.output_dir): args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir)) - image_type = 'binary' if args.package_type == 'performance' else args.package_type - image_name = IMAGE_MAP[image_type] + image_type = "binary" if args.package_type == "performance" else args.package_type + image_name = "clickhouse/binary-builder" if not os.path.isabs(args.clickhouse_repo_path): ch_root = os.path.abspath(os.path.join(os.getcwd(), args.clickhouse_repo_path)) else: ch_root = args.clickhouse_repo_path - if args.alien_pkgs and not image_type == "deb": - raise Exception("Can add alien packages only in deb build") + if args.additional_pkgs and image_type != "deb": + raise Exception("Can build additional packages only in deb build") - if args.with_binaries != "" and not image_type == "deb": + if args.with_binaries != "" and image_type != "deb": raise Exception("Can add additional binaries only in deb build") if args.with_binaries != "" and image_type == "deb": - logging.info("Should place {} to output".format(args.with_binaries)) + logging.info("Should place %s to output", args.with_binaries) dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") image_with_version = image_name + ":" + args.docker_image_version - if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image: + if ( + image_type != "freebsd" + and not check_image_exists_locally(image_name) + or args.force_build_image + ): if not pull_image(image_with_version) or args.force_build_image: build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( - args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, - args.cache, args.distcc_hosts, args.split_binary, args.clang_tidy, - args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries) + args.build_type, + args.compiler, + args.sanitizer, + args.package_type, + image_type, + args.cache, + args.distcc_hosts, + args.split_binary, + args.clang_tidy, + args.version, + args.author, + args.official, + args.additional_pkgs, + args.with_coverage, + args.with_binaries, + ) - run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir, args.docker_image_version) - logging.info("Output placed into {}".format(args.output_dir)) + run_docker_image_with_env( + image_name, + args.output_dir, + env_prepared, + ch_root, + args.ccache_dir, + args.docker_image_version, + ) + logging.info("Output placed into %s", args.output_dir) diff --git a/docker/server/.gitignore b/docker/server/.gitignore deleted file mode 100644 index 692758d55aa..00000000000 --- a/docker/server/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -alpine-root/* -tgz-packages/* diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile deleted file mode 100644 index 5b7990ab030..00000000000 --- a/docker/server/Dockerfile +++ /dev/null @@ -1,122 +0,0 @@ -FROM ubuntu:20.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ARG repository="deb https://packages.clickhouse.com/deb stable main" -ARG version=22.1.1.* - -# set non-empty deb_location_url url to create a docker image -# from debs created by CI build, for example: -# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852 -ARG deb_location_url="" - -# set non-empty single_binary_location_url to create docker image -# from a single binary url (useful for non-standard builds - with sanitizers, for arm64). -# for example (run on aarch64 server): -# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm -# note: clickhouse-odbc-bridge is not supported there. -ARG single_binary_location_url="" - -# see https://github.com/moby/moby/issues/4032#issuecomment-192327844 -ARG DEBIAN_FRONTEND=noninteractive - -# user/group precreated explicitly with fixed uid/gid on purpose. -# It is especially important for rootless containers: in that case entrypoint -# can't do chown and owners of mounted volumes should be configured externally. -# We do that in advance at the begining of Dockerfile before any packages will be -# installed to prevent picking those uid / gid by some unrelated software. -# The same uid / gid (101) is used both for alpine and ubuntu. - -# To drop privileges, we need 'su' command, that simply changes uid and gid. -# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux: -# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking -# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal -# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does) -# and for these reasons people are using alternatives to the 'su' command in Docker, -# that don't mess with the terminal, don't care about closing the opened files, etc... -# but can only be safe to drop privileges inside Docker. -# The question - what implementation of 'su' command to use. -# It should be a simple script doing about just two syscalls. -# Some people tend to use 'gosu' tool that is written in Go. -# It is not used for several reasons: -# 1. Dependency on some foreign code in yet another programming language - does not sound alright. -# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners. - -COPY su-exec.c /su-exec.c - -RUN groupadd -r clickhouse --gid=101 \ - && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ - && apt-get update \ - && apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - dirmngr \ - gnupg \ - locales \ - wget \ - tzdata \ - && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ - && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ - && if [ -n "$deb_location_url" ]; then \ - echo "installing from custom url with deb packages: $deb_location_url" \ - rm -rf /tmp/clickhouse_debs \ - && mkdir -p /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-common-static_${version}_amd64.deb" -P /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-client_${version}_all.deb" -P /tmp/clickhouse_debs \ - && wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-server_${version}_all.deb" -P /tmp/clickhouse_debs \ - && dpkg -i /tmp/clickhouse_debs/*.deb ; \ - elif [ -n "$single_binary_location_url" ]; then \ - echo "installing from single binary url: $single_binary_location_url" \ - && rm -rf /tmp/clickhouse_binary \ - && mkdir -p /tmp/clickhouse_binary \ - && wget --progress=bar:force:noscroll "$single_binary_location_url" -O /tmp/clickhouse_binary/clickhouse \ - && chmod +x /tmp/clickhouse_binary/clickhouse \ - && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \ - else \ - echo "installing from repository: $repository" \ - && apt-get update \ - && apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ - && apt-get install --allow-unauthenticated --yes --no-install-recommends \ - clickhouse-common-static=$version \ - clickhouse-client=$version \ - clickhouse-server=$version ; \ - fi \ - && apt-get install -y --no-install-recommends tcc libc-dev && \ - tcc /su-exec.c -o /bin/su-exec && \ - chown root:root /bin/su-exec && \ - chmod 0755 /bin/su-exec && \ - rm /su-exec.c && \ - apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \ - && clickhouse-local -q 'SELECT * FROM system.build_options' \ - && rm -rf \ - /var/lib/apt/lists/* \ - /var/cache/debconf \ - /tmp/* \ - && apt-get clean \ - && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ - && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client - -# we need to allow "others" access to clickhouse folder, because docker container -# can be started with arbitrary uid (openshift usecase) - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 -ENV TZ UTC - -RUN mkdir /docker-entrypoint-initdb.d - -COPY docker_related_config.xml /etc/clickhouse-server/config.d/ -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh - -EXPOSE 9000 8123 9009 -VOLUME /var/lib/clickhouse - -ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile new file mode 120000 index 00000000000..fd45f0f7c7c --- /dev/null +++ b/docker/server/Dockerfile @@ -0,0 +1 @@ +Dockerfile.ubuntu \ No newline at end of file diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index cd192c0c9da..5aaf5dd5511 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -1,3 +1,14 @@ +FROM ubuntu:20.04 AS glibc-donor +ARG TARGETARCH + +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && ln -s "${rarch}-linux-gnu" /lib/linux-gnu + + FROM alpine ENV LANG=en_US.UTF-8 \ @@ -6,7 +17,24 @@ ENV LANG=en_US.UTF-8 \ TZ=UTC \ CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml -COPY alpine-root/ / +COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/ +COPY --from=glibc-donor /etc/nsswitch.conf /etc/ +COPY docker_related_config.xml /etc/clickhouse-server/config.d/ +COPY entrypoint.sh /entrypoint.sh + +ARG TARGETARCH + +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \ + arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \ + esac + +# lts / testing / prestable / etc +ARG REPO_CHANNEL="stable" +ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" +ARG VERSION="20.9.3.45" +ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. # It is especially important for rootless containers: in that case entrypoint @@ -15,9 +43,23 @@ COPY alpine-root/ / # installed to prevent picking those uid / gid by some unrelated software. # The same uid / gid (101) is used both for alpine and ubuntu. -RUN addgroup -S -g 101 clickhouse \ +RUN arch=${TARGETARCH:-amd64} \ + && for package in ${PACKAGES}; do \ + { \ + { echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \ + } || \ + { echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \ + && wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \ + && tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \ + } ; \ + } || exit 1 \ + ; done \ + && rm /tmp/*.tgz /install -r \ + && addgroup -S -g 101 clickhouse \ && adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse server" -u 101 clickhouse \ - && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server/config.d /etc/clickhouse-server/users.d /etc/clickhouse-client /docker-entrypoint-initdb.d \ && chown clickhouse:clickhouse /var/lib/clickhouse \ && chown root:clickhouse /var/log/clickhouse-server \ && chmod +x /entrypoint.sh \ diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu new file mode 100644 index 00000000000..cc198772251 --- /dev/null +++ b/docker/server/Dockerfile.ubuntu @@ -0,0 +1,128 @@ +FROM ubuntu:20.04 + +# see https://github.com/moby/moby/issues/4032#issuecomment-192327844 +ARG DEBIAN_FRONTEND=noninteractive + +COPY su-exec.c /su-exec.c + +# ARG for quick switch to a given ubuntu mirror +ARG apt_archive="http://archive.ubuntu.com" +RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \ + && groupadd -r clickhouse --gid=101 \ + && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ + && apt-get update \ + && apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + dirmngr \ + gnupg \ + locales \ + wget \ + tzdata \ + && apt-get install -y --no-install-recommends tcc libc-dev && \ + tcc /su-exec.c -o /bin/su-exec && \ + chown root:root /bin/su-exec && \ + chmod 0755 /bin/su-exec && \ + rm /su-exec.c && \ + apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \ + && apt-get clean + +ARG REPO_CHANNEL="stable" +ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" +ARG VERSION=22.1.1.* +ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" + +# set non-empty deb_location_url url to create a docker image +# from debs created by CI build, for example: +# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852 +ARG deb_location_url="" + +# set non-empty single_binary_location_url to create docker image +# from a single binary url (useful for non-standard builds - with sanitizers, for arm64). +# for example (run on aarch64 server): +# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm +# note: clickhouse-odbc-bridge is not supported there. +ARG single_binary_location_url="" + +# user/group precreated explicitly with fixed uid/gid on purpose. +# It is especially important for rootless containers: in that case entrypoint +# can't do chown and owners of mounted volumes should be configured externally. +# We do that in advance at the begining of Dockerfile before any packages will be +# installed to prevent picking those uid / gid by some unrelated software. +# The same uid / gid (101) is used both for alpine and ubuntu. + +# To drop privileges, we need 'su' command, that simply changes uid and gid. +# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux: +# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking +# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal +# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does) +# and for these reasons people are using alternatives to the 'su' command in Docker, +# that don't mess with the terminal, don't care about closing the opened files, etc... +# but can only be safe to drop privileges inside Docker. +# The question - what implementation of 'su' command to use. +# It should be a simple script doing about just two syscalls. +# Some people tend to use 'gosu' tool that is written in Go. +# It is not used for several reasons: +# 1. Dependency on some foreign code in yet another programming language - does not sound alright. +# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners. + +ARG TARGETARCH + +RUN arch=${TARGETARCH:-amd64} \ + && if [ -n "${deb_location_url}" ]; then \ + echo "installing from custom url with deb packages: ${deb_location_url}" \ + rm -rf /tmp/clickhouse_debs \ + && mkdir -p /tmp/clickhouse_debs \ + && for package in ${PACKAGES}; do \ + { wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_${arch}.deb" -P /tmp/clickhouse_debs || \ + wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_all.deb" -P /tmp/clickhouse_debs ; } \ + || exit 1 \ + ; done \ + && dpkg -i /tmp/clickhouse_debs/*.deb ; \ + elif [ -n "${single_binary_location_url}" ]; then \ + echo "installing from single binary url: ${single_binary_location_url}" \ + && rm -rf /tmp/clickhouse_binary \ + && mkdir -p /tmp/clickhouse_binary \ + && wget --progress=bar:force:noscroll "${single_binary_location_url}" -O /tmp/clickhouse_binary/clickhouse \ + && chmod +x /tmp/clickhouse_binary/clickhouse \ + && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \ + else \ + mkdir -p /etc/apt/sources.list.d \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ + && echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \ + && echo "installing from repository: ${REPOSITORY}" \ + && apt-get update \ + && apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ + && for package in ${PACKAGES}; do \ + apt-get install --allow-unauthenticated --yes --no-install-recommends "${package}=${VERSION}" || exit 1 \ + ; done \ + ; fi \ + && clickhouse-local -q 'SELECT * FROM system.build_options' \ + && rm -rf \ + /var/lib/apt/lists/* \ + /var/cache/debconf \ + /tmp/* \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ + && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client + +# we need to allow "others" access to clickhouse folder, because docker container +# can be started with arbitrary uid (openshift usecase) + +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 +ENV TZ UTC + +RUN mkdir /docker-entrypoint-initdb.d + +COPY docker_related_config.xml /etc/clickhouse-server/config.d/ +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +EXPOSE 9000 8123 9009 +VOLUME /var/lib/clickhouse + +ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh deleted file mode 100755 index 1b448c61fbb..00000000000 --- a/docker/server/alpine-build.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -set -x - -REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc -REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}" -VERSION="${VERSION:-20.9.3.45}" -DOCKER_IMAGE="${DOCKER_IMAGE:-clickhouse/clickhouse-server}" - -# where original files live -DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}" - -# we will create root for our image here -CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root" - -# clean up the root from old runs, it's reconstructed each time -rm -rf "$CONTAINER_ROOT_FOLDER" -mkdir -p "$CONTAINER_ROOT_FOLDER" - -# where to put downloaded tgz -TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages" -mkdir -p "$TGZ_PACKAGES_FOLDER" - -PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" ) - -# download tars from the repo -for package in "${PACKAGES[@]}" -do - wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" -done - -# unpack tars -for package in "${PACKAGES[@]}" -do - tar xvzf "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" --strip-components=2 -C "$CONTAINER_ROOT_FOLDER" -done - -# prepare few more folders -mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \ - "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d" \ - "${CONTAINER_ROOT_FOLDER}/var/log/clickhouse-server" \ - "${CONTAINER_ROOT_FOLDER}/var/lib/clickhouse" \ - "${CONTAINER_ROOT_FOLDER}/docker-entrypoint-initdb.d" \ - "${CONTAINER_ROOT_FOLDER}/lib64" - -cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/" -cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh" - -## get glibc components from ubuntu 20.04 and put them to expected place -docker pull ubuntu:20.04 -ubuntu20image=$(docker create --rm ubuntu:20.04) -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libc.so.6 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libdl.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib" -docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" -docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc" - -docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull -rm -rf "$CONTAINER_ROOT_FOLDER" diff --git a/docker/server/local.Dockerfile b/docker/server/local.Dockerfile deleted file mode 100644 index 0d86c9ce45a..00000000000 --- a/docker/server/local.Dockerfile +++ /dev/null @@ -1,47 +0,0 @@ -# Since right now we can't set volumes to the docker during build, we split building container in stages: -# 1. build base container -# 2. run base conatiner with mounted volumes -# 3. commit container as image -# 4. build final container atop that image -# Middle steps are performed by the bash script. - -FROM ubuntu:18.04 as clickhouse-server-base -ARG gosu_ver=1.14 - -VOLUME /packages/ - -# update to allow installing dependencies of clickhouse automatically -RUN apt update; \ - DEBIAN_FRONTEND=noninteractive \ - apt install -y locales; - -ADD https://github.com/tianon/gosu/releases/download/${gosu_ver}/gosu-amd64 /bin/gosu - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically. -CMD DEBIAN_FRONTEND=noninteractive \ - apt install -y \ - /packages/clickhouse-common-static_*.deb \ - /packages/clickhouse-server_*.deb ; - -FROM clickhouse-server-base:postinstall as clickhouse-server - -RUN mkdir /docker-entrypoint-initdb.d - -COPY docker_related_config.xml /etc/clickhouse-server/config.d/ -COPY entrypoint.sh /entrypoint.sh - -RUN chmod +x \ - /entrypoint.sh \ - /bin/gosu - -EXPOSE 9000 8123 9009 -VOLUME /var/lib/clickhouse - -ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index bd1e0292636..079d2872204 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -267,6 +267,7 @@ function run_tests local test_opts=( --hung-check --fast-tests-only + --no-random-settings --no-long --testname --shard diff --git a/docker/test/fuzzer/generate-test-j2.py b/docker/test/fuzzer/generate-test-j2.py index bcc1bf6bc84..11525163ed8 100755 --- a/docker/test/fuzzer/generate-test-j2.py +++ b/docker/test/fuzzer/generate-test-j2.py @@ -11,7 +11,7 @@ def removesuffix(text, suffix): https://www.python.org/dev/peps/pep-0616/ """ if suffix and text.endswith(suffix): - return text[:-len(suffix)] + return text[: -len(suffix)] else: return text[:] diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index e18c07bf2c1..74711f476f8 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -13,7 +13,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"} -BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} +BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function clone { diff --git a/docker/test/integration/hive_server/http_api_server.py b/docker/test/integration/hive_server/http_api_server.py index 4818b785c89..8a9d3da4846 100644 --- a/docker/test/integration/hive_server/http_api_server.py +++ b/docker/test/integration/hive_server/http_api_server.py @@ -3,55 +3,55 @@ import subprocess import datetime from flask import Flask, flash, request, redirect, url_for + def run_command(command, wait=False): print("{} - execute shell command:{}".format(datetime.datetime.now(), command)) lines = [] - p = subprocess.Popen(command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - shell=True) + p = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True + ) if wait: - for l in iter(p.stdout.readline, b''): + for l in iter(p.stdout.readline, b""): lines.append(l) p.poll() return (lines, p.returncode) else: - return(iter(p.stdout.readline, b''), 0) + return (iter(p.stdout.readline, b""), 0) -UPLOAD_FOLDER = './' -ALLOWED_EXTENSIONS = {'txt', 'sh'} +UPLOAD_FOLDER = "./" +ALLOWED_EXTENSIONS = {"txt", "sh"} app = Flask(__name__) -app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER +app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER -@app.route('/') + +@app.route("/") def hello_world(): - return 'Hello World' + return "Hello World" def allowed_file(filename): - return '.' in filename and \ - filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS -@app.route('/upload', methods=['GET', 'POST']) +@app.route("/upload", methods=["GET", "POST"]) def upload_file(): - if request.method == 'POST': + if request.method == "POST": # check if the post request has the file part - if 'file' not in request.files: - flash('No file part') + if "file" not in request.files: + flash("No file part") return redirect(request.url) - file = request.files['file'] + file = request.files["file"] # If the user does not select a file, the browser submits an # empty file without a filename. - if file.filename == '': - flash('No selected file') + if file.filename == "": + flash("No selected file") return redirect(request.url) if file and allowed_file(file.filename): filename = file.filename - file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) - return redirect(url_for('upload_file', name=filename)) - return ''' + file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename)) + return redirect(url_for("upload_file", name=filename)) + return """ Upload new File

Upload new File

@@ -59,12 +59,15 @@ def upload_file(): - ''' -@app.route('/run', methods=['GET', 'POST']) + """ + + +@app.route("/run", methods=["GET", "POST"]) def parse_request(): data = request.data # data is empty run_command(data, wait=True) - return 'Ok' + return "Ok" -if __name__ == '__main__': - app.run(port=5011) + +if __name__ == "__main__": + app.run(port=5011) diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index d7534270e2c..4dec82234bc 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} @@ -10,7 +10,7 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then CLICKHOUSE_REPO_PATH=ch rm -rf ch ||: mkdir ch ||: - wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz" + wget -nv -nd -c "https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz" tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz ls -lath ||: fi diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 16ac304d7fb..cdfa080a475 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -1294,15 +1294,15 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') select '' test_name, '$(sed -n 's/.*/\1/p' report.html)' test_status, 0 test_duration_ms, - 'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url + 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url union all select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms, - 'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.' + 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.' || test || '.' || toString(query_index) report_url from queries where changed_fail != 0 and diff > 0 union all select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms, - 'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.' + 'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.' || test || '.' || toString(query_index) report_url from queries where unstable_fail != 0 ) @@ -1378,7 +1378,7 @@ $REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/ EOF # Also insert some data about the check into the CI checks table. - "${client[@]}" --query "INSERT INTO "'"'"gh-data"'"'".checks FORMAT TSVWithNamesAndTypes" \ + "${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \ < ci-checks.tsv set -x diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index 8fa6eb5ec83..ae9e677713f 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -16,26 +16,17 @@ right_sha=$4 datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"} declare -A dataset_paths -if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then - dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar" - dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar" - dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar" - dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar" -else - dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" - dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" - dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" - dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar" -fi +dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar" +dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar" +dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar" +dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar" function download { # Historically there were various paths for the performance test package. # Test all of them. - declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz" - "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz" - ) + declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz") for path in "${urls_to_try[@]}" do diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 3d37a6c0e92..767807d008b 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -4,7 +4,7 @@ set -ex CHPC_CHECK_START_TIMESTAMP="$(date +%s)" export CHPC_CHECK_START_TIMESTAMP -S3_URL=${S3_URL:="https://clickhouse-builds.s3.yandex.net"} +S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"} COMMON_BUILD_PREFIX="/clickhouse_build_check" if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then @@ -64,9 +64,7 @@ function find_reference_sha # Historically there were various path for the performance test package, # test all of them. unset found - declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz" - "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz" - ) + declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz") for path in "${urls_to_try[@]}" do if curl_with_retry "$path" diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 61987d34299..2266641397b 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -19,58 +19,126 @@ import xml.etree.ElementTree as et from threading import Thread from scipy import stats -logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING') +logging.basicConfig( + format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING" +) total_start_seconds = time.perf_counter() stage_start_seconds = total_start_seconds + def reportStageEnd(stage): global stage_start_seconds, total_start_seconds current = time.perf_counter() - print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}') + print( + f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}" + ) stage_start_seconds = current def tsv_escape(s): - return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') + return ( + s.replace("\\", "\\\\") + .replace("\t", "\\t") + .replace("\n", "\\n") + .replace("\r", "") + ) -parser = argparse.ArgumentParser(description='Run performance test.') +parser = argparse.ArgumentParser(description="Run performance test.") # Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. -parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') -parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.") -parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.") -parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.') -parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.') -parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.') -parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.') -parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.') -parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.') -parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') -parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') -parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.') -parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.") -parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.") +parser.add_argument( + "file", + metavar="FILE", + type=argparse.FileType("r", encoding="utf-8"), + nargs=1, + help="test description file", +) +parser.add_argument( + "--host", + nargs="*", + default=["localhost"], + help="Space-separated list of server hostname(s). Corresponds to '--port' options.", +) +parser.add_argument( + "--port", + nargs="*", + default=[9000], + help="Space-separated list of server port(s). Corresponds to '--host' options.", +) +parser.add_argument( + "--runs", type=int, default=1, help="Number of query runs per server." +) +parser.add_argument( + "--max-queries", + type=int, + default=None, + help="Test no more than this number of queries, chosen at random.", +) +parser.add_argument( + "--queries-to-run", + nargs="*", + type=int, + default=None, + help="Space-separated list of indexes of queries to test.", +) +parser.add_argument( + "--max-query-seconds", + type=int, + default=15, + help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.", +) +parser.add_argument( + "--prewarm-max-query-seconds", + type=int, + default=180, + help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.", +) +parser.add_argument( + "--profile-seconds", + type=int, + default=0, + help="For how many seconds to profile a query for which the performance has changed.", +) +parser.add_argument( + "--long", action="store_true", help="Do not skip the tests tagged as long." +) +parser.add_argument( + "--print-queries", action="store_true", help="Print test queries and exit." +) +parser.add_argument( + "--print-settings", action="store_true", help="Print test settings and exit." +) +parser.add_argument( + "--keep-created-tables", + action="store_true", + help="Don't drop the created tables after the test.", +) +parser.add_argument( + "--use-existing-tables", + action="store_true", + help="Don't create or drop the tables, use the existing ones instead.", +) args = parser.parse_args() -reportStageEnd('start') +reportStageEnd("start") test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] tree = et.parse(args.file[0]) root = tree.getroot() -reportStageEnd('parse') +reportStageEnd("parse") # Process query parameters -subst_elems = root.findall('substitutions/substitution') -available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } +subst_elems = root.findall("substitutions/substitution") +available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } for e in subst_elems: - name = e.find('name').text - values = [v.text for v in e.findall('values/value')] + name = e.find("name").text + values = [v.text for v in e.findall("values/value")] if not values: - raise Exception(f'No values given for substitution {{{name}}}') + raise Exception(f"No values given for substitution {{{name}}}") available_parameters[name] = values @@ -78,7 +146,7 @@ for e in subst_elems: # parameters. The set of parameters is determined based on the first list. # Note: keep the order of queries -- sometimes we have DROP IF EXISTS # followed by CREATE in create queries section, so the order matters. -def substitute_parameters(query_templates, other_templates = []): +def substitute_parameters(query_templates, other_templates=[]): query_results = [] other_results = [[]] * (len(other_templates)) for i, q in enumerate(query_templates): @@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []): # and reporting the queries marked as short. test_queries = [] is_short = [] -for e in root.findall('query'): - new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]]) +for e in root.findall("query"): + new_queries, [new_is_short] = substitute_parameters( + [e.text], [[e.attrib.get("short", "0")]] + ) test_queries += new_queries is_short += [eval(s) for s in new_is_short] -assert(len(test_queries) == len(is_short)) +assert len(test_queries) == len(is_short) # If we're given a list of queries to run, check that it makes sense. for i in args.queries_to_run or []: if i < 0 or i >= len(test_queries): - print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present') + print( + f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present" + ) exit(1) # If we're only asked to print the queries, do that and exit. @@ -125,60 +197,65 @@ if args.print_queries: # Print short queries for i, s in enumerate(is_short): if s: - print(f'short\t{i}') + print(f"short\t{i}") # If we're only asked to print the settings, do that and exit. These are settings # for clickhouse-benchmark, so we print them as command line arguments, e.g. # '--max_memory_usage=10000000'. if args.print_settings: - for s in root.findall('settings/*'): - print(f'--{s.tag}={s.text}') + for s in root.findall("settings/*"): + print(f"--{s.tag}={s.text}") exit(0) # Skip long tests if not args.long: - for tag in root.findall('.//tag'): - if tag.text == 'long': - print('skipped\tTest is tagged as long.') + for tag in root.findall(".//tag"): + if tag.text == "long": + print("skipped\tTest is tagged as long.") sys.exit(0) # Print report threshold for the test if it is set. ignored_relative_change = 0.05 -if 'max_ignored_relative_change' in root.attrib: +if "max_ignored_relative_change" in root.attrib: ignored_relative_change = float(root.attrib["max_ignored_relative_change"]) - print(f'report-threshold\t{ignored_relative_change}') + print(f"report-threshold\t{ignored_relative_change}") -reportStageEnd('before-connect') +reportStageEnd("before-connect") # Open connections -servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)] +servers = [ + {"host": host or args.host[0], "port": port or args.port[0]} + for (host, port) in itertools.zip_longest(args.host, args.port) +] # Force settings_is_important to fail queries on unknown settings. -all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers] +all_connections = [ + clickhouse_driver.Client(**server, settings_is_important=True) for server in servers +] for i, s in enumerate(servers): print(f'server\t{i}\t{s["host"]}\t{s["port"]}') -reportStageEnd('connect') +reportStageEnd("connect") if not args.use_existing_tables: # Run drop queries, ignoring errors. Do this before all other activity, # because clickhouse_driver disconnects on error (this is not configurable), # and the new connection loses the changes in settings. - drop_query_templates = [q.text for q in root.findall('drop_query')] + drop_query_templates = [q.text for q in root.findall("drop_query")] drop_queries = substitute_parameters(drop_query_templates) for conn_index, c in enumerate(all_connections): for q in drop_queries: try: c.execute(q) - print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') + print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}") except: pass - reportStageEnd('drop-1') + reportStageEnd("drop-1") # Apply settings. -settings = root.findall('settings/*') +settings = root.findall("settings/*") for conn_index, c in enumerate(all_connections): for s in settings: # requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings @@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections): # the test, which is wrong. c.execute("select 1") -reportStageEnd('settings') +reportStageEnd("settings") # Check tables that should exist. If they don't exist, just skip this test. -tables = [e.text for e in root.findall('preconditions/table_exists')] +tables = [e.text for e in root.findall("preconditions/table_exists")] for t in tables: for c in all_connections: try: res = c.execute("select 1 from {} limit 1".format(t)) except: exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1] - skipped_message = ' '.join(exception_message.split('\n')[:2]) - print(f'skipped\t{tsv_escape(skipped_message)}') + skipped_message = " ".join(exception_message.split("\n")[:2]) + print(f"skipped\t{tsv_escape(skipped_message)}") sys.exit(0) -reportStageEnd('preconditions') +reportStageEnd("preconditions") if not args.use_existing_tables: # Run create and fill queries. We will run them simultaneously for both # servers, to save time. The weird XML search + filter is because we want to # keep the relative order of elements, and etree doesn't support the # appropriate xpath query. - create_query_templates = [q.text for q in root.findall('./*') - if q.tag in ('create_query', 'fill_query')] + create_query_templates = [ + q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query") + ] create_queries = substitute_parameters(create_query_templates) # Disallow temporary tables, because the clickhouse_driver reconnects on # errors, and temporary tables are destroyed. We want to be able to continue # after some errors. for q in create_queries: - if re.search('create temporary table', q, flags=re.IGNORECASE): - print(f"Temporary tables are not allowed in performance tests: '{q}'", - file = sys.stderr) + if re.search("create temporary table", q, flags=re.IGNORECASE): + print( + f"Temporary tables are not allowed in performance tests: '{q}'", + file=sys.stderr, + ) sys.exit(1) def do_create(connection, index, queries): for q in queries: connection.execute(q) - print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}') + print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}") threads = [ - Thread(target = do_create, args = (connection, index, create_queries)) - for index, connection in enumerate(all_connections)] + Thread(target=do_create, args=(connection, index, create_queries)) + for index, connection in enumerate(all_connections) + ] for t in threads: t.start() @@ -238,14 +319,16 @@ if not args.use_existing_tables: for t in threads: t.join() - reportStageEnd('create') + reportStageEnd("create") # By default, test all queries. queries_to_run = range(0, len(test_queries)) if args.max_queries: # If specified, test a limited number of queries chosen at random. - queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries)) + queries_to_run = random.sample( + range(0, len(test_queries)), min(len(test_queries), args.max_queries) + ) if args.queries_to_run: # Run the specified queries. @@ -255,16 +338,16 @@ if args.queries_to_run: profile_total_seconds = 0 for query_index in queries_to_run: q = test_queries[query_index] - query_prefix = f'{test_name}.query{query_index}' + query_prefix = f"{test_name}.query{query_index}" # We have some crazy long queries (about 100kB), so trim them to a sane # length. This means we can't use query text as an identifier and have to # use the test name + the test-wide query index. query_display_name = q if len(query_display_name) > 1000: - query_display_name = f'{query_display_name[:1000]}...({query_index})' + query_display_name = f"{query_display_name[:1000]}...({query_index})" - print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}') + print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}") # Prewarm: run once on both servers. Helps to bring the data into memory, # precompile the queries, etc. @@ -272,10 +355,10 @@ for query_index in queries_to_run: # new one. We want to run them on the new server only, so that the PR author # can ensure that the test works properly. Remember the errors we had on # each server. - query_error_on_connection = [None] * len(all_connections); + query_error_on_connection = [None] * len(all_connections) for conn_index, c in enumerate(all_connections): try: - prewarm_id = f'{query_prefix}.prewarm0' + prewarm_id = f"{query_prefix}.prewarm0" try: # During the warmup runs, we will also: @@ -283,25 +366,30 @@ for query_index in queries_to_run: # * collect profiler traces, which might be helpful for analyzing # test coverage. We disable profiler for normal runs because # it makes the results unstable. - res = c.execute(q, query_id = prewarm_id, - settings = { - 'max_execution_time': args.prewarm_max_query_seconds, - 'query_profiler_real_time_period_ns': 10000000, - 'memory_profiler_step': '4Mi', - }) + res = c.execute( + q, + query_id=prewarm_id, + settings={ + "max_execution_time": args.prewarm_max_query_seconds, + "query_profiler_real_time_period_ns": 10000000, + "memory_profiler_step": "4Mi", + }, + ) except clickhouse_driver.errors.Error as e: # Add query id to the exception to make debugging easier. e.args = (prewarm_id, *e.args) - e.message = prewarm_id + ': ' + e.message + e.message = prewarm_id + ": " + e.message raise - print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') + print( + f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}" + ) except KeyboardInterrupt: raise except: # FIXME the driver reconnects on error and we lose settings, so this # might lead to further errors or unexpected behavior. - query_error_on_connection[conn_index] = traceback.format_exc(); + query_error_on_connection[conn_index] = traceback.format_exc() continue # Report all errors that ocurred during prewarm and decide what to do next. @@ -311,14 +399,14 @@ for query_index in queries_to_run: no_errors = [] for i, e in enumerate(query_error_on_connection): if e: - print(e, file = sys.stderr) + print(e, file=sys.stderr) else: no_errors.append(i) if len(no_errors) == 0: continue elif len(no_errors) < len(all_connections): - print(f'partial\t{query_index}\t{no_errors}') + print(f"partial\t{query_index}\t{no_errors}") this_query_connections = [all_connections[index] for index in no_errors] @@ -337,27 +425,34 @@ for query_index in queries_to_run: all_server_times.append([]) while True: - run_id = f'{query_prefix}.run{run}' + run_id = f"{query_prefix}.run{run}" for conn_index, c in enumerate(this_query_connections): try: - res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds}) + res = c.execute( + q, + query_id=run_id, + settings={"max_execution_time": args.max_query_seconds}, + ) except clickhouse_driver.errors.Error as e: # Add query id to the exception to make debugging easier. e.args = (run_id, *e.args) - e.message = run_id + ': ' + e.message + e.message = run_id + ": " + e.message raise elapsed = c.last_query.elapsed all_server_times[conn_index].append(elapsed) server_seconds += elapsed - print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}') + print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}") if elapsed > args.max_query_seconds: # Do not stop processing pathologically slow queries, # since this may hide errors in other queries. - print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr) + print( + f"The query no. {query_index} is taking too long to run ({elapsed} s)", + file=sys.stderr, + ) # Be careful with the counter, after this line it's the next iteration # already. @@ -386,7 +481,7 @@ for query_index in queries_to_run: break client_seconds = time.perf_counter() - start_seconds - print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') + print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}") # Run additional profiling queries to collect profile data, but only if test times appeared to be different. # We have to do it after normal runs because otherwise it will affect test statistics too much @@ -397,13 +492,15 @@ for query_index in queries_to_run: # Don't fail if for some reason there are not enough measurements. continue - pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue + pvalue = stats.ttest_ind( + all_server_times[0], all_server_times[1], equal_var=False + ).pvalue median = [statistics.median(t) for t in all_server_times] # Keep this consistent with the value used in report. Should eventually move # to (median[1] - median[0]) / min(median), which is compatible with "times" # difference we use in report (max(median) / min(median)). relative_diff = (median[1] - median[0]) / median[0] - print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}') + print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}") if abs(relative_diff) < ignored_relative_change or pvalue > 0.05: continue @@ -412,25 +509,31 @@ for query_index in queries_to_run: profile_start_seconds = time.perf_counter() run = 0 while time.perf_counter() - profile_start_seconds < args.profile_seconds: - run_id = f'{query_prefix}.profile{run}' + run_id = f"{query_prefix}.profile{run}" for conn_index, c in enumerate(this_query_connections): try: - res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000}) - print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') + res = c.execute( + q, + query_id=run_id, + settings={"query_profiler_real_time_period_ns": 10000000}, + ) + print( + f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}" + ) except clickhouse_driver.errors.Error as e: # Add query id to the exception to make debugging easier. e.args = (run_id, *e.args) - e.message = run_id + ': ' + e.message + e.message = run_id + ": " + e.message raise run += 1 profile_total_seconds += time.perf_counter() - profile_start_seconds -print(f'profile-total\t{profile_total_seconds}') +print(f"profile-total\t{profile_total_seconds}") -reportStageEnd('run') +reportStageEnd("run") # Run drop queries if not args.keep_created_tables and not args.use_existing_tables: @@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables: for conn_index, c in enumerate(all_connections): for q in drop_queries: c.execute(q) - print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') + print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}") - reportStageEnd('drop-2') + reportStageEnd("drop-2") diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 4cff6b41949..0cb8481ee6e 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -12,9 +12,13 @@ import pprint import sys import traceback -parser = argparse.ArgumentParser(description='Create performance test report') -parser.add_argument('--report', default='main', choices=['main', 'all-queries'], - help='Which report to build') +parser = argparse.ArgumentParser(description="Create performance test report") +parser.add_argument( + "--report", + default="main", + choices=["main", "all-queries"], + help="Which report to build", +) args = parser.parse_args() tables = [] @@ -31,8 +35,8 @@ unstable_partial_queries = 0 # max seconds to run one query by itself, not counting preparation allowed_single_run_time = 2 -color_bad='#ffb0c0' -color_good='#b0d050' +color_bad = "#ffb0c0" +color_good = "#b0d050" header_template = """ @@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}} table_anchor = 0 row_anchor = 0 + def currentTableAnchor(): global table_anchor - return f'{table_anchor}' + return f"{table_anchor}" + def newTableAnchor(): global table_anchor table_anchor += 1 return currentTableAnchor() + def currentRowAnchor(): global row_anchor global table_anchor - return f'{table_anchor}.{row_anchor}' + return f"{table_anchor}.{row_anchor}" + def nextRowAnchor(): global row_anchor global table_anchor - return f'{table_anchor}.{row_anchor + 1}' + return f"{table_anchor}.{row_anchor + 1}" + def advanceRowAnchor(): global row_anchor @@ -178,43 +187,58 @@ def advanceRowAnchor(): def tr(x, anchor=None): - #return '{x}'.format(a=a, x=str(x)) + # return '{x}'.format(a=a, x=str(x)) anchor = anchor if anchor else advanceRowAnchor() - return f'{x}' + return f"{x}" -def td(value, cell_attributes = ''): - return '{value}'.format( - cell_attributes = cell_attributes, - value = value) -def th(value, cell_attributes = ''): - return '{value}'.format( - cell_attributes = cell_attributes, - value = value) +def td(value, cell_attributes=""): + return "{value}".format( + cell_attributes=cell_attributes, value=value + ) -def tableRow(cell_values, cell_attributes = [], anchor=None): + +def th(value, cell_attributes=""): + return "{value}".format( + cell_attributes=cell_attributes, value=value + ) + + +def tableRow(cell_values, cell_attributes=[], anchor=None): return tr( - ''.join([td(v, a) - for v, a in itertools.zip_longest( - cell_values, cell_attributes, - fillvalue = '') - if a is not None and v is not None]), - anchor) + "".join( + [ + td(v, a) + for v, a in itertools.zip_longest( + cell_values, cell_attributes, fillvalue="" + ) + if a is not None and v is not None + ] + ), + anchor, + ) -def tableHeader(cell_values, cell_attributes = []): + +def tableHeader(cell_values, cell_attributes=[]): return tr( - ''.join([th(v, a) - for v, a in itertools.zip_longest( - cell_values, cell_attributes, - fillvalue = '') - if a is not None and v is not None])) + "".join( + [ + th(v, a) + for v, a in itertools.zip_longest( + cell_values, cell_attributes, fillvalue="" + ) + if a is not None and v is not None + ] + ) + ) + def tableStart(title): - cls = '-'.join(title.lower().split(' ')[:3]); + cls = "-".join(title.lower().split(" ")[:3]) global table_anchor table_anchor = cls anchor = currentTableAnchor() - help_anchor = '-'.join(title.lower().split(' ')); + help_anchor = "-".join(title.lower().split(" ")) return f"""

{title} @@ -223,12 +247,14 @@ def tableStart(title): """ + def tableEnd(): - return '
' + return "" + def tsvRows(n): try: - with open(n, encoding='utf-8') as fd: + with open(n, encoding="utf-8") as fd: result = [] for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE): new_row = [] @@ -237,27 +263,32 @@ def tsvRows(n): # The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so # 'Чем зÐ�нимаеÑ�ЬÑ�Ñ�' is transformed back into 'Чем зАнимаешЬся'. - new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8')) + new_row.append( + e.encode("utf-8") + .decode("unicode-escape") + .encode("latin1") + .decode("utf-8") + ) result.append(new_row) return result except: - report_errors.append( - traceback.format_exception_only( - *sys.exc_info()[:2])[-1]) + report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass return [] + def htmlRows(n): rawRows = tsvRows(n) - result = '' + result = "" for row in rawRows: result += tableRow(row) return result + def addSimpleTable(caption, columns, rows, pos=None): global tables - text = '' + text = "" if not rows: return @@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None): text += tableEnd() tables.insert(pos if pos else len(tables), text) + def add_tested_commits(): global report_errors try: - addSimpleTable('Tested Commits', ['Old', 'New'], - [['
{}
'.format(x) for x in - [open('left-commit.txt').read(), - open('right-commit.txt').read()]]]) + addSimpleTable( + "Tested Commits", + ["Old", "New"], + [ + [ + "
{}
".format(x) + for x in [ + open("left-commit.txt").read(), + open("right-commit.txt").read(), + ] + ] + ], + ) except: # Don't fail if no commit info -- maybe it's a manual run. - report_errors.append( - traceback.format_exception_only( - *sys.exc_info()[:2])[-1]) + report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass + def add_report_errors(): global tables global report_errors # Add the errors reported by various steps of comparison script try: - report_errors += [l.strip() for l in open('report/errors.log')] + report_errors += [l.strip() for l in open("report/errors.log")] except: - report_errors.append( - traceback.format_exception_only( - *sys.exc_info()[:2])[-1]) + report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass if not report_errors: return - text = tableStart('Errors while Building the Report') - text += tableHeader(['Error']) + text = tableStart("Errors while Building the Report") + text += tableHeader(["Error"]) for x in report_errors: text += tableRow([x]) text += tableEnd() # Insert after Tested Commits tables.insert(1, text) - errors_explained.append([f'There were some errors while building the report']); + errors_explained.append( + [ + f'There were some errors while building the report' + ] + ) + def add_errors_explained(): if not errors_explained: return text = '' - text += tableStart('Error Summary') - text += tableHeader(['Description']) + text += tableStart("Error Summary") + text += tableHeader(["Description"]) for row in errors_explained: text += tableRow(row) text += tableEnd() @@ -321,59 +364,81 @@ def add_errors_explained(): tables.insert(1, text) -if args.report == 'main': +if args.report == "main": print((header_template.format())) add_tested_commits() - - run_error_rows = tsvRows('run-errors.tsv') + run_error_rows = tsvRows("run-errors.tsv") error_tests += len(run_error_rows) - addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows) + addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows) if run_error_rows: - errors_explained.append([f'There were some errors while running the tests']); + errors_explained.append( + [ + f'There were some errors while running the tests' + ] + ) - - slow_on_client_rows = tsvRows('report/slow-on-client.tsv') + slow_on_client_rows = tsvRows("report/slow-on-client.tsv") error_tests += len(slow_on_client_rows) - addSimpleTable('Slow on Client', - ['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'], - slow_on_client_rows) + addSimpleTable( + "Slow on Client", + ["Client time, s", "Server time, s", "Ratio", "Test", "Query"], + slow_on_client_rows, + ) if slow_on_client_rows: - errors_explained.append([f'Some queries are taking noticeable time client-side (missing `FORMAT Null`?)']); + errors_explained.append( + [ + f'Some queries are taking noticeable time client-side (missing `FORMAT Null`?)' + ] + ) - unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv') + unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv") error_tests += len(unmarked_short_rows) - addSimpleTable('Unexpected Query Duration', - ['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'], - unmarked_short_rows) + addSimpleTable( + "Unexpected Query Duration", + ["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"], + unmarked_short_rows, + ) if unmarked_short_rows: - errors_explained.append([f'Some queries have unexpected duration']); + errors_explained.append( + [ + f'Some queries have unexpected duration' + ] + ) def add_partial(): - rows = tsvRows('report/partial-queries-report.tsv') + rows = tsvRows("report/partial-queries-report.tsv") if not rows: return global unstable_partial_queries, slow_average_tests, tables - text = tableStart('Partial Queries') - columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query'] + text = tableStart("Partial Queries") + columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"] text += tableHeader(columns) - attrs = ['' for c in columns] + attrs = ["" for c in columns] for row in rows: - anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}' + anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}" if float(row[1]) > 0.10: attrs[1] = f'style="background: {color_bad}"' unstable_partial_queries += 1 - errors_explained.append([f'The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%']) + errors_explained.append( + [ + f"The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%" + ] + ) else: - attrs[1] = '' + attrs[1] = "" if float(row[0]) > allowed_single_run_time: attrs[0] = f'style="background: {color_bad}"' - errors_explained.append([f'The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"']) + errors_explained.append( + [ + f'The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"' + ] + ) slow_average_tests += 1 else: - attrs[0] = '' + attrs[0] = "" text += tableRow(row, attrs, anchor) text += tableEnd() tables.append(text) @@ -381,41 +446,45 @@ if args.report == 'main': add_partial() def add_changes(): - rows = tsvRows('report/changed-perf.tsv') + rows = tsvRows("report/changed-perf.tsv") if not rows: return global faster_queries, slower_queries, tables - text = tableStart('Changes in Performance') + text = tableStart("Changes in Performance") columns = [ - 'Old, s', # 0 - 'New, s', # 1 - 'Ratio of speedup (-) or slowdown (+)', # 2 - 'Relative difference (new − old) / old', # 3 - 'p < 0.01 threshold', # 4 - '', # Failed # 5 - 'Test', # 6 - '#', # 7 - 'Query', # 8 - ] - attrs = ['' for c in columns] + "Old, s", # 0 + "New, s", # 1 + "Ratio of speedup (-) or slowdown (+)", # 2 + "Relative difference (new − old) / old", # 3 + "p < 0.01 threshold", # 4 + "", # Failed # 5 + "Test", # 6 + "#", # 7 + "Query", # 8 + ] + attrs = ["" for c in columns] attrs[5] = None text += tableHeader(columns, attrs) for row in rows: - anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}' + anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}" if int(row[5]): - if float(row[3]) < 0.: + if float(row[3]) < 0.0: faster_queries += 1 attrs[2] = attrs[3] = f'style="background: {color_good}"' else: slower_queries += 1 attrs[2] = attrs[3] = f'style="background: {color_bad}"' - errors_explained.append([f'The query no. {row[7]} of test \'{row[6]}\' has slowed down']) + errors_explained.append( + [ + f"The query no. {row[7]} of test '{row[6]}' has slowed down" + ] + ) else: - attrs[2] = attrs[3] = '' + attrs[2] = attrs[3] = "" text += tableRow(row, attrs, anchor) @@ -427,35 +496,35 @@ if args.report == 'main': def add_unstable_queries(): global unstable_queries, very_unstable_queries, tables - unstable_rows = tsvRows('report/unstable-queries.tsv') + unstable_rows = tsvRows("report/unstable-queries.tsv") if not unstable_rows: return unstable_queries += len(unstable_rows) columns = [ - 'Old, s', #0 - 'New, s', #1 - 'Relative difference (new - old)/old', #2 - 'p < 0.01 threshold', #3 - '', # Failed #4 - 'Test', #5 - '#', #6 - 'Query' #7 + "Old, s", # 0 + "New, s", # 1 + "Relative difference (new - old)/old", # 2 + "p < 0.01 threshold", # 3 + "", # Failed #4 + "Test", # 5 + "#", # 6 + "Query", # 7 ] - attrs = ['' for c in columns] + attrs = ["" for c in columns] attrs[4] = None - text = tableStart('Unstable Queries') + text = tableStart("Unstable Queries") text += tableHeader(columns, attrs) for r in unstable_rows: - anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}' + anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}" if int(r[4]): very_unstable_queries += 1 attrs[3] = f'style="background: {color_bad}"' else: - attrs[3] = '' + attrs[3] = "" # Just don't add the slightly unstable queries we don't consider # errors. It's not clear what the user should do with them. continue @@ -470,53 +539,70 @@ if args.report == 'main': add_unstable_queries() - skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') - addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows) + skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv") + addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows) - addSimpleTable('Test Performance Changes', - ['Test', 'Ratio of speedup (-) or slowdown (+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'], - tsvRows('report/test-perf-changes.tsv')) + addSimpleTable( + "Test Performance Changes", + [ + "Test", + "Ratio of speedup (-) or slowdown (+)", + "Queries", + "Total not OK", + "Changed perf", + "Unstable", + ], + tsvRows("report/test-perf-changes.tsv"), + ) def add_test_times(): global slow_average_tests, tables - rows = tsvRows('report/test-times.tsv') + rows = tsvRows("report/test-times.tsv") if not rows: return columns = [ - 'Test', #0 - 'Wall clock time, entire test, s', #1 - 'Total client time for measured query runs, s', #2 - 'Queries', #3 - 'Longest query, total for measured runs, s', #4 - 'Wall clock time per query, s', #5 - 'Shortest query, total for measured runs, s', #6 - '', # Runs #7 - ] - attrs = ['' for c in columns] + "Test", # 0 + "Wall clock time, entire test, s", # 1 + "Total client time for measured query runs, s", # 2 + "Queries", # 3 + "Longest query, total for measured runs, s", # 4 + "Wall clock time per query, s", # 5 + "Shortest query, total for measured runs, s", # 6 + "", # Runs #7 + ] + attrs = ["" for c in columns] attrs[7] = None - text = tableStart('Test Times') + text = tableStart("Test Times") text += tableHeader(columns, attrs) - allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs + allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs for r in rows: - anchor = f'{currentTableAnchor()}.{r[0]}' + anchor = f"{currentTableAnchor()}.{r[0]}" total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers - if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs: + if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs: # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 attrs[5] = f'style="background: {color_bad}"' - errors_explained.append([f'The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up']) + errors_explained.append( + [ + f"The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up" + ] + ) else: - attrs[5] = '' + attrs[5] = "" - if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs: + if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs: slow_average_tests += 1 attrs[4] = f'style="background: {color_bad}"' - errors_explained.append([f'Some query of the test \'{r[0]}\' is too slow to run. See the all queries report']) + errors_explained.append( + [ + f"Some query of the test '{r[0]}' is too slow to run. See the all queries report" + ] + ) else: - attrs[4] = '' + attrs[4] = "" text += tableRow(r, attrs, anchor) @@ -525,10 +611,17 @@ if args.report == 'main': add_test_times() - addSimpleTable('Metric Changes', - ['Metric', 'Old median value', 'New median value', - 'Relative difference', 'Times difference'], - tsvRows('metrics/changes.tsv')) + addSimpleTable( + "Metric Changes", + [ + "Metric", + "Old median value", + "New median value", + "Relative difference", + "Times difference", + ], + tsvRows("metrics/changes.tsv"), + ) add_report_errors() add_errors_explained() @@ -536,7 +629,8 @@ if args.report == 'main': for t in tables: print(t) - print(f""" + print( + f""" - """) + """ + ) - status = 'success' - message = 'See the report' + status = "success" + message = "See the report" message_array = [] if slow_average_tests: - status = 'failure' - message_array.append(str(slow_average_tests) + ' too long') + status = "failure" + message_array.append(str(slow_average_tests) + " too long") if faster_queries: - message_array.append(str(faster_queries) + ' faster') + message_array.append(str(faster_queries) + " faster") if slower_queries: if slower_queries > 3: - status = 'failure' - message_array.append(str(slower_queries) + ' slower') + status = "failure" + message_array.append(str(slower_queries) + " slower") if unstable_partial_queries: very_unstable_queries += unstable_partial_queries - status = 'failure' + status = "failure" # Don't show mildly unstable queries, only the very unstable ones we # treat as errors. if very_unstable_queries: if very_unstable_queries > 5: error_tests += very_unstable_queries - status = 'failure' - message_array.append(str(very_unstable_queries) + ' unstable') + status = "failure" + message_array.append(str(very_unstable_queries) + " unstable") error_tests += slow_average_tests if error_tests: - status = 'failure' - message_array.insert(0, str(error_tests) + ' errors') + status = "failure" + message_array.insert(0, str(error_tests) + " errors") if message_array: - message = ', '.join(message_array) + message = ", ".join(message_array) if report_errors: - status = 'failure' - message = 'Errors while building the report.' + status = "failure" + message = "Errors while building the report." - print((""" + print( + ( + """ - """.format(status=status, message=message))) + """.format( + status=status, message=message + ) + ) + ) -elif args.report == 'all-queries': +elif args.report == "all-queries": print((header_template.format())) add_tested_commits() def add_all_queries(): - rows = tsvRows('report/all-queries.tsv') + rows = tsvRows("report/all-queries.tsv") if not rows: return columns = [ - '', # Changed #0 - '', # Unstable #1 - 'Old, s', #2 - 'New, s', #3 - 'Ratio of speedup (-) or slowdown (+)', #4 - 'Relative difference (new − old) / old', #5 - 'p < 0.01 threshold', #6 - 'Test', #7 - '#', #8 - 'Query', #9 - ] - attrs = ['' for c in columns] + "", # Changed #0 + "", # Unstable #1 + "Old, s", # 2 + "New, s", # 3 + "Ratio of speedup (-) or slowdown (+)", # 4 + "Relative difference (new − old) / old", # 5 + "p < 0.01 threshold", # 6 + "Test", # 7 + "#", # 8 + "Query", # 9 + ] + attrs = ["" for c in columns] attrs[0] = None attrs[1] = None - text = tableStart('All Query Times') + text = tableStart("All Query Times") text += tableHeader(columns, attrs) for r in rows: - anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}' + anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}" if int(r[1]): attrs[6] = f'style="background: {color_bad}"' else: - attrs[6] = '' + attrs[6] = "" if int(r[0]): - if float(r[5]) > 0.: + if float(r[5]) > 0.0: attrs[4] = attrs[5] = f'style="background: {color_bad}"' else: attrs[4] = attrs[5] = f'style="background: {color_good}"' else: - attrs[4] = attrs[5] = '' + attrs[4] = attrs[5] = "" if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time: attrs[2] = f'style="background: {color_bad}"' attrs[3] = f'style="background: {color_bad}"' else: - attrs[2] = '' - attrs[3] = '' + attrs[2] = "" + attrs[3] = "" text += tableRow(r, attrs, anchor) @@ -655,7 +756,8 @@ elif args.report == 'all-queries': for t in tables: print(t) - print(f""" + print( + f""" - """) + """ + ) diff --git a/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py b/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py index 58d6ba8c62a..b5bc82e6818 100755 --- a/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py +++ b/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py @@ -7,18 +7,19 @@ import csv RESULT_LOG_NAME = "run.log" + def process_result(result_folder): status = "success" - description = 'Server started and responded' + description = "Server started and responded" summary = [("Smoke test", "OK")] - with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log: - lines = run_log.read().split('\n') - if not lines or lines[0].strip() != 'OK': + with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log: + lines = run_log.read().split("\n") + if not lines or lines[0].strip() != "OK": status = "failure" - logging.info("Lines is not ok: %s", str('\n'.join(lines))) + logging.info("Lines is not ok: %s", str("\n".join(lines))) summary = [("Smoke test", "FAIL")] - description = 'Server failed to respond, see result in logs' + description = "Server failed to respond, see result in logs" result_logs = [] server_log_path = os.path.join(result_folder, "clickhouse-server.log") @@ -38,20 +39,22 @@ def process_result(result_folder): def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of split build smoke test" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results, logs = process_result(args.in_results_dir) diff --git a/docker/test/sqlancer/process_sqlancer_result.py b/docker/test/sqlancer/process_sqlancer_result.py index ede3cabc1c5..37b8f465498 100755 --- a/docker/test/sqlancer/process_sqlancer_result.py +++ b/docker/test/sqlancer/process_sqlancer_result.py @@ -10,11 +10,18 @@ def process_result(result_folder): status = "success" summary = [] paths = [] - tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"] + tests = [ + "TLPWhere", + "TLPGroupBy", + "TLPHaving", + "TLPWhereGroupBy", + "TLPDistinct", + "TLPAggregate", + ] for test in tests: - err_path = '{}/{}.err'.format(result_folder, test) - out_path = '{}/{}.out'.format(result_folder, test) + err_path = "{}/{}.err".format(result_folder, test) + out_path = "{}/{}.out".format(result_folder, test) if not os.path.exists(err_path): logging.info("No output err on path %s", err_path) summary.append((test, "SKIPPED")) @@ -23,24 +30,24 @@ def process_result(result_folder): else: paths.append(err_path) paths.append(out_path) - with open(err_path, 'r') as f: - if 'AssertionError' in f.read(): + with open(err_path, "r") as f: + if "AssertionError" in f.read(): summary.append((test, "FAIL")) - status = 'failure' + status = "failure" else: summary.append((test, "OK")) - logs_path = '{}/logs.tar.gz'.format(result_folder) + logs_path = "{}/logs.tar.gz".format(result_folder) if not os.path.exists(logs_path): logging.info("No logs tar on path %s", logs_path) else: paths.append(logs_path) - stdout_path = '{}/stdout.log'.format(result_folder) + stdout_path = "{}/stdout.log".format(result_folder) if not os.path.exists(stdout_path): logging.info("No stdout log on path %s", stdout_path) else: paths.append(stdout_path) - stderr_path = '{}/stderr.log'.format(result_folder) + stderr_path = "{}/stderr.log".format(result_folder) if not os.path.exists(stderr_path): logging.info("No stderr log on path %s", stderr_path) else: @@ -52,20 +59,22 @@ def process_result(result_folder): def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of sqlancer test" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results, logs = process_result(args.in_results_dir) diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 93e7cebb857..543cf113b2b 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -11,7 +11,7 @@ RUN apt-get update -y \ COPY s3downloader /s3downloader -ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" +ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com" ENV DATASETS="hits visits" ENV EXPORT_S3_STORAGE_POLICIES=1 diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 77dc61e6cd0..861e17848a4 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -115,7 +115,7 @@ function run_tests() fi set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \ + clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \ --skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \ "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index eb3b3cd9faf..b1302877d6a 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -10,7 +10,7 @@ import requests import tempfile -DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net' +DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com' AVAILABLE_DATASETS = { 'hits': 'hits_v1.tar', diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 5fd78502337..f8b73791388 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -131,8 +131,23 @@ clickhouse-client -q "system flush logs" ||: grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz & -clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz & -clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz & + +# Compress tables. +# +# NOTE: +# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data +# directly +# - even though ci auto-compress some files (but not *.tsv) it does this only +# for files >64MB, we want this files to be compressed explicitly +for table in query_log zookeeper_log trace_log +do + clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz & + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz & + clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz & + fi +done +wait ||: # Also export trace log in flamegraph-friendly format. for trace_type in CPU Memory Real @@ -161,14 +176,6 @@ fi tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: -# Replace the engine with Ordinary to avoid extra symlinks stuff in artifacts. -# (so that clickhouse-local --path can read it w/o extra care). -sed -i -e "s/ATTACH DATABASE _ UUID '[^']*'/ATTACH DATABASE system/" -e "s/Atomic/Ordinary/" /var/lib/clickhouse/metadata/system.sql -for table in text_log query_log zookeeper_log trace_log; do - sed -i "s/ATTACH TABLE _ UUID '[^']*'/ATTACH TABLE $table/" /var/lib/clickhouse/metadata/system/${table}.sql - tar -chf /test_output/${table}_dump.tar /var/lib/clickhouse/metadata/system.sql /var/lib/clickhouse/metadata/system/${table}.sql /var/lib/clickhouse/data/system/${table} ||: -done - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||: grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||: @@ -179,8 +186,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] rm /var/log/clickhouse-server/clickhouse-server2.log mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: - tar -chf /test_output/zookeeper_log_dump1.tar /var/lib/clickhouse1/data/system/zookeeper_log ||: - tar -chf /test_output/zookeeper_log_dump2.tar /var/lib/clickhouse2/data/system/zookeeper_log ||: tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: fi diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh index df27b21b05b..aa2aedefad8 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/docker/test/stateless/setup_minio.sh @@ -41,6 +41,7 @@ sleep 5 ./mc admin user add clickminio test testtest ./mc admin policy set clickminio readwrite user=test ./mc mb clickminio/test +./mc policy set public clickminio/test # Upload data to Minio. By default after unpacking all tests will in diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 1f39202e743..ba6daffc014 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -25,10 +25,11 @@ RUN apt-get update -y \ brotli COPY ./stress /stress +COPY ./download_previous_release /download_previous_release COPY run.sh / ENV DATASETS="hits visits" -ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" +ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com" ENV EXPORT_S3_STORAGE_POLICIES=1 CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stress/download_previous_release b/docker/test/stress/download_previous_release new file mode 100755 index 00000000000..ea3d376ad90 --- /dev/null +++ b/docker/test/stress/download_previous_release @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import requests +import re +import os + +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" + +CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" +CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static-dbg_{version}_amd64.deb" +CLICKHOUSE_SERVER_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-server_{version}_all.deb" +CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-client_{version}_amd64.deb" + + +CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb" +CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = "clickhouse-common-static-dbg_{version}_amd64.deb" +CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb" +CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb" + +PACKETS_DIR = "previous_release_package_folder/" +VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" + + +class Version: + def __init__(self, version): + self.version = version + + def __lt__(self, other): + return list(map(int, self.version.split('.'))) < list(map(int, other.version.split('.'))) + + def __str__(self): + return self.version + + +class ReleaseInfo: + def __init__(self, version, release_type): + self.version = version + self.type = release_type + + +def find_previous_release(server_version, releases): + releases.sort(key=lambda x: x.version, reverse=True) + for release in releases: + if release.version < server_version: + return True, release + + return False, None + + +def get_previous_release(server_version): + page = 1 + found = False + while not found: + response = requests.get(CLICKHOUSE_TAGS_URL, {'page': page, 'per_page': 100}) + if not response.ok: + raise Exception('Cannot load the list of tags from github: ' + response.reason) + + releases_str = set(re.findall(VERSION_PATTERN, response.text)) + if len(releases_str) == 0: + raise Exception('Cannot find previous release for ' + str(server_version) + ' server version') + + releases = list(map(lambda x: ReleaseInfo(Version(x.split('-')[0]), x.split('-')[1]), releases_str)) + found, previous_release = find_previous_release(server_version, releases) + page += 1 + + return previous_release + + +def download_packet(url, local_file_name, retries=10, backoff_factor=0.3): + session = requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + response = session.get(url) + print(url) + if response.ok: + open(PACKETS_DIR + local_file_name, 'wb').write(response.content) + + +def download_packets(release): + if not os.path.exists(PACKETS_DIR): + os.makedirs(PACKETS_DIR) + + download_packet(CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(version=release.version, type=release.type), + CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version)) + + download_packet(CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(version=release.version, type=release.type), + CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version)) + + download_packet(CLICKHOUSE_SERVER_DOWNLOAD_URL.format(version=release.version, type=release.type), + CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)) + + download_packet(CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(version=release.version, type=release.type), + CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)) + + +if __name__ == '__main__': + server_version = Version(input()) + previous_release = get_previous_release(server_version) + download_packets(previous_release) + diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 65c5fb9e40f..3cef5b008db 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -22,15 +22,19 @@ export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 -dpkg -i package_folder/clickhouse-common-static_*.deb -dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -dpkg -i package_folder/clickhouse-server_*.deb -dpkg -i package_folder/clickhouse-client_*.deb +function install_packages() +{ + dpkg -i $1/clickhouse-common-static_*.deb + dpkg -i $1/clickhouse-common-static-dbg_*.deb + dpkg -i $1/clickhouse-server_*.deb + dpkg -i $1/clickhouse-client_*.deb +} function configure() { @@ -116,7 +120,7 @@ function start() counter=0 until clickhouse-client --query "SELECT 1" do - if [ "$counter" -gt 240 ] + if [ "$counter" -gt ${1:-240} ] then echo "Cannot start clickhouse-server" cat /var/log/clickhouse-server/stdout.log @@ -171,6 +175,8 @@ quit time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: } +install_packages package_folder + configure ./setup_minio.sh @@ -202,14 +208,12 @@ stop start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv + || (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" -# Print Fatal log messages to stdout -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* - # Grep logs for sanitizer asserts, crashes and other critical errors # Sanitizer asserts @@ -226,26 +230,155 @@ zgrep -Fa " Application: Child process was terminated by signal 9" /var/ || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors -zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \ + && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv +# Remove file logical_errors.txt if it's empty +[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt + # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \ + && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv +# Remove file fatal_messages.txt if it's empty +[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt + zgrep -Fa "########################################" /test_output/* > /dev/null \ && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \ && echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv +echo -e "Backward compatibility check\n" + +echo "Download previous release server" +mkdir previous_release_package_folder +clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv + +if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] +then + echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv + stop + + # Uninstall current packages + dpkg --remove clickhouse-client + dpkg --remove clickhouse-server + dpkg --remove clickhouse-common-static-dbg + dpkg --remove clickhouse-common-static + + rm -rf /var/lib/clickhouse/* + + # Install previous release packages + install_packages previous_release_package_folder + + # Start server from previous release + configure + start + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Install new package before running stress test because we should use new clickhouse-client and new clickhouse-test + install_packages package_folder + + mkdir tmp_stress_output + + ./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ + && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv + rm -rf tmp_stress_output + + clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" + + stop + + # Start new server + configure + start 500 + clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ + || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Let the server run for a while before checking log. + sleep 60 + + stop + + # Error messages (we should ignore some errors) + echo "Check for Error messages in server log:" + zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ + -e "Code: 236. DB::Exception: Cancelled mutating parts" \ + -e "REPLICA_IS_ALREADY_ACTIVE" \ + -e "REPLICA_IS_ALREADY_EXIST" \ + -e "ALL_REPLICAS_LOST" \ + -e "DDLWorker: Cannot parse DDL task query" \ + -e "RaftInstance: failed to accept a rpc connection due to error 125" \ + -e "UNKNOWN_DATABASE" \ + -e "NETWORK_ERROR" \ + -e "UNKNOWN_TABLE" \ + -e "ZooKeeperClient" \ + -e "KEEPER_EXCEPTION" \ + -e "DirectoryMonitor" \ + -e "TABLE_IS_READ_ONLY" \ + -e "Code: 1000, e.code() = 111, Connection refused" \ + -e "UNFINISHED" \ + -e "Renaming unexpected part" \ + /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ + && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_error_messages.txt if it's empty + [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt + + # Sanitizer asserts + zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \ + && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv + rm -f /test_output/tmp + + # OOM + zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ + && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Logical errors + echo "Check for Logical errors in server log:" + zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \ + && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_logical_errors.txt if it's empty + [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt + + # Crash + zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ + && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv + + # It also checks for crash without stacktrace (printed by watchdog) + echo "Check for Fatal message in server log:" + zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \ + && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_fatal_messages.txt if it's empty + [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt + +else + echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv +fi + # Put logs into /test_output/ for log_file in /var/log/clickhouse-server/clickhouse-server.log* do diff --git a/docker/test/stress/stress b/docker/test/stress/stress index c89c5ff5e27..86f8edf5980 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -47,7 +47,8 @@ def get_options(i): return ' '.join(options) -def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit): +def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit, backward_compatibility_check): + backward_compatibility_check_option = '--backward-compatibility-check' if backward_compatibility_check else '' global_time_limit_option = '' if global_time_limit: global_time_limit_option = "--global_time_limit={}".format(global_time_limit) @@ -56,7 +57,7 @@ def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_t pipes = [] for i in range(0, len(output_paths)): f = open(output_paths[i], 'w') - full_command = "{} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option) + full_command = "{} {} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option, backward_compatibility_check_option) logging.info("Run func tests '%s'", full_command) p = Popen(full_command, shell=True, stdout=f, stderr=f) pipes.append(p) @@ -168,6 +169,7 @@ if __name__ == "__main__": parser.add_argument("--output-folder") parser.add_argument("--global-time-limit", type=int, default=1800) parser.add_argument("--num-parallel", type=int, default=cpu_count()) + parser.add_argument('--backward-compatibility-check', action='store_true') parser.add_argument('--hung-check', action='store_true', default=False) # make sense only for hung check parser.add_argument('--drop-databases', action='store_true', default=False) @@ -176,7 +178,7 @@ if __name__ == "__main__": if args.drop_databases and not args.hung_check: raise Exception("--drop-databases only used in hung check (--hung-check)") func_pipes = [] - func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit) + func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit, args.backward_compatibility_check) logging.info("Will wait functests to finish") while True: diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 85c751edfbe..3101ab84c40 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ - && pip3 install codespell PyGithub boto3 unidiff dohq-artifactory + && pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 655b7d70243..6472ff21f5e 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -14,6 +14,7 @@ def process_result(result_folder): ("header duplicates", "duplicate_output.txt"), ("shellcheck", "shellcheck_output.txt"), ("style", "style_output.txt"), + ("black", "black_output.txt"), ("typos", "typos_output.txt"), ("whitespaces", "whitespaces_output.txt"), ("workflows", "workflows_output.txt"), diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index ce3ea4e50a6..651883511e8 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -7,11 +7,13 @@ echo "Check duplicates" | ts ./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt echo "Check style" | ts ./check-style -n |& tee /test_output/style_output.txt +echo "Check python formatting with black" | ts +./check-black -n |& tee /test_output/black_output.txt echo "Check typos" | ts ./check-typos |& tee /test_output/typos_output.txt echo "Check whitespaces" | ts ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt -echo "Check sorkflows" | ts +echo "Check workflows" | ts ./check-workflows |& tee /test_output/workflows_output.txt echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt diff --git a/docker/test/test_runner.sh b/docker/test/test_runner.sh deleted file mode 100755 index 0c99c8c2b32..00000000000 --- a/docker/test/test_runner.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/sh - -set -e -x - -# Not sure why shellcheck complains that rc is not assigned before it is referenced. -# shellcheck disable=SC2154 -trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT - -# CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time -readonly NO_REBUILD_FLAG="--no-rebuild" - -readonly CLICKHOUSE_DOCKER_DIR="$(realpath "${1}")" -readonly CLICKHOUSE_PACKAGES_ARG="${2}" -CLICKHOUSE_SERVER_IMAGE="${3}" - -if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - readonly CLICKHOUSE_PACKAGES_DIR="$(realpath "${2}")" # or --no-rebuild -fi - - -# In order to allow packages directory to be anywhere, and to reduce amount of context sent to the docker daemon, -# all images are built in multiple stages: -# 1. build base image, install dependencies -# 2. run image with volume mounted, install what needed from those volumes -# 3. tag container as image -# 4. [optional] build another image atop of tagged. - -# TODO: optionally mount most recent clickhouse-test and queries directory from local machine - -if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ - --target clickhouse-test-runner-base \ - -t clickhouse-test-runner-base:preinstall \ - "${CLICKHOUSE_DOCKER_DIR}/test/stateless" - - docker rm -f clickhouse-test-runner-installing-packages || true - docker run --network=host \ - -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ - --name clickhouse-test-runner-installing-packages \ - clickhouse-test-runner-base:preinstall - docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local - docker rm -f clickhouse-test-runner-installing-packages || true -fi - -# # Create a bind-volume to the clickhouse-test script file -# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/clickhouse-test --opt o=bind clickhouse-test-script-volume -# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/queries --opt o=bind clickhouse-test-queries-dir-volume - -# Build server image (optional) from local packages -if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then - CLICKHOUSE_SERVER_IMAGE="clickhouse/server:local" - - if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - --target clickhouse-server-base \ - -t clickhouse-server-base:preinstall \ - "${CLICKHOUSE_DOCKER_DIR}/server" - - docker rm -f clickhouse_server_base_installing_server || true - docker run --network=host -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ - --name clickhouse_server_base_installing_server \ - clickhouse-server-base:preinstall - docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall - - docker build --network=host \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - --target clickhouse-server \ - -t "${CLICKHOUSE_SERVER_IMAGE}" \ - "${CLICKHOUSE_DOCKER_DIR}/server" - fi -fi - -docker rm -f test-runner || true -docker-compose down -CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ - docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ - create \ - --build --force-recreate - -CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ - docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ - run \ - --name test-runner \ - test-runner diff --git a/docker/test/test_runner_docker_compose.yaml b/docker/test/test_runner_docker_compose.yaml deleted file mode 100644 index 2aef6a48d77..00000000000 --- a/docker/test/test_runner_docker_compose.yaml +++ /dev/null @@ -1,34 +0,0 @@ -version: "2" - -services: - clickhouse-server: - image: ${CLICKHOUSE_SERVER_IMAGE} - expose: - - "8123" # HTTP - - "9000" # TCP - - "9009" # HTTP-interserver - restart: "no" - - test-runner: - image: clickhouse-statelest-test-runner:local - - restart: "no" - depends_on: - - clickhouse-server - environment: - # these are used by clickhouse-test to point clickhouse-client to the right server - - CLICKHOUSE_HOST=clickhouse-server - - CLICKHOUSE_PORT=9009 - - CLICKHOUSE_TEST_HOST_EXPOSED_PORT=51234 - expose: - # port for any test to serve data to clickhouse-server on rare occasion (like URL-engine tables in 00646), - # should match value of CLICKHOUSE_TEST_HOST_EXPOSED_PORT above - - "51234" - - # NOTE: Dev-mode: mount newest versions of the queries and clickhouse-test script into container. - # volumes: - # - /home/enmk/proj/ClickHouse_master/tests/queries:/usr/share/clickhouse-test/queries:ro - # - /home/enmk/proj/ClickHouse_master/tests/clickhouse-test:/usr/bin/clickhouse-test:ro - - # String-form instead of list-form to allow multiple arguments in "${CLICKHOUSE_TEST_ARGS}" - entrypoint: "clickhouse-test ${CLICKHOUSE_TEST_ARGS}" diff --git a/docker/test/testflows/runner/process_testflows_result.py b/docker/test/testflows/runner/process_testflows_result.py index 37d0b6a69d1..8bfc4ac0b0f 100755 --- a/docker/test/testflows/runner/process_testflows_result.py +++ b/docker/test/testflows/runner/process_testflows_result.py @@ -22,9 +22,9 @@ def process_result(result_folder): total_other = 0 test_results = [] for test in results["tests"]: - test_name = test['test']['test_name'] - test_result = test['result']['result_type'].upper() - test_time = str(test['result']['message_rtime']) + test_name = test["test"]["test_name"] + test_result = test["result"]["result_type"].upper() + test_time = str(test["result"]["message_rtime"]) total_tests += 1 if test_result == "OK": total_ok += 1 @@ -39,24 +39,29 @@ def process_result(result_folder): else: status = "success" - description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other) + description = "failed: {}, passed: {}, other: {}".format( + total_fail, total_ok, total_other + ) return status, description, test_results, [json_path, test_binary_log] def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) + if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests") - parser.add_argument("--in-results-dir", default='./') - parser.add_argument("--out-results-file", default='./test_results.tsv') - parser.add_argument("--out-status-file", default='./check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of Testflows tests" + ) + parser.add_argument("--in-results-dir", default="./") + parser.add_argument("--out-results-file", default="./test_results.tsv") + parser.add_argument("--out-status-file", default="./check_status.tsv") args = parser.parse_args() state, description, test_results, logs = process_result(args.in_results_dir) @@ -64,4 +69,3 @@ if __name__ == "__main__": status = (state, description) write_results(args.out_results_file, args.out_status_file, test_results, status) logging.info("Result written") - diff --git a/docker/test/unit/process_unit_tests_result.py b/docker/test/unit/process_unit_tests_result.py index 7219aa13b82..0550edc7c25 100755 --- a/docker/test/unit/process_unit_tests_result.py +++ b/docker/test/unit/process_unit_tests_result.py @@ -5,24 +5,26 @@ import logging import argparse import csv -OK_SIGN = 'OK ]' -FAILED_SIGN = 'FAILED ]' -SEGFAULT = 'Segmentation fault' -SIGNAL = 'received signal SIG' -PASSED = 'PASSED' +OK_SIGN = "OK ]" +FAILED_SIGN = "FAILED ]" +SEGFAULT = "Segmentation fault" +SIGNAL = "received signal SIG" +PASSED = "PASSED" + def get_test_name(line): - elements = reversed(line.split(' ')) + elements = reversed(line.split(" ")) for element in elements: - if '(' not in element and ')' not in element: + if "(" not in element and ")" not in element: return element raise Exception("No test name in line '{}'".format(line)) + def process_result(result_folder): summary = [] total_counter = 0 failed_counter = 0 - result_log_path = '{}/test_result.txt'.format(result_folder) + result_log_path = "{}/test_result.txt".format(result_folder) if not os.path.exists(result_log_path): logging.info("No output log on path %s", result_log_path) return "exception", "No output log", [] @@ -30,7 +32,7 @@ def process_result(result_folder): status = "success" description = "" passed = False - with open(result_log_path, 'r') as test_result: + with open(result_log_path, "r") as test_result: for line in test_result: if OK_SIGN in line: logging.info("Found ok line: '%s'", line) @@ -38,7 +40,7 @@ def process_result(result_folder): logging.info("Test name: '%s'", test_name) summary.append((test_name, "OK")) total_counter += 1 - elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line: + elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line: logging.info("Found fail line: '%s'", line) test_name = get_test_name(line.strip()) logging.info("Test name: '%s'", test_name) @@ -67,25 +69,30 @@ def process_result(result_folder): status = "failure" if not description: - description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter) + description += "fail: {}, passed: {}".format( + failed_counter, total_counter - failed_counter + ) return status, description, summary def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) + if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of unit tests" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results = process_result(args.in_results_dir) @@ -93,4 +100,3 @@ if __name__ == "__main__": status = (state, description) write_results(args.out_results_file, args.out_status_file, test_results, status) logging.info("Result written") - diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 82df170686d..dadda55c830 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"] RETRIES_SIGN = "Some tests were restarted" + def process_test_log(log_path): total = 0 skipped = 0 @@ -26,7 +27,7 @@ def process_test_log(log_path): retries = False task_timeout = True test_results = [] - with open(log_path, 'r') as test_file: + with open(log_path, "r") as test_file: for line in test_file: original_line = line line = line.strip() @@ -36,12 +37,15 @@ def process_test_log(log_path): hung = True if RETRIES_SIGN in line: retries = True - if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)): - test_name = line.split(' ')[2].split(':')[0] + if any( + sign in line + for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN) + ): + test_name = line.split(" ")[2].split(":")[0] - test_time = '' + test_time = "" try: - time_token = line.split(']')[1].strip().split()[0] + time_token = line.split("]")[1].strip().split()[0] float(time_token) test_time = time_token except: @@ -66,9 +70,22 @@ def process_test_log(log_path): elif len(test_results) > 0 and test_results[-1][1] == "FAIL": test_results[-1][3].append(original_line) - test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results] + test_results = [ + (test[0], test[1], test[2], "".join(test[3])) for test in test_results + ] + + return ( + total, + skipped, + unknown, + failed, + success, + hung, + task_timeout, + retries, + test_results, + ) - return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results def process_result(result_path): test_results = [] @@ -76,16 +93,26 @@ def process_result(result_path): description = "" files = os.listdir(result_path) if files: - logging.info("Find files in result folder %s", ','.join(files)) - result_path = os.path.join(result_path, 'test_result.txt') + logging.info("Find files in result folder %s", ",".join(files)) + result_path = os.path.join(result_path, "test_result.txt") else: result_path = None description = "No output log" state = "error" if result_path and os.path.exists(result_path): - total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path) - is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1)) + ( + total, + skipped, + unknown, + failed, + success, + hung, + task_timeout, + retries, + test_results, + ) = process_test_log(result_path) + is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1)) logging.info("Is flacky check: %s", is_flacky_check) # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped. @@ -120,20 +147,22 @@ def process_result(result_path): def write_results(results_file, status_file, results, status): - with open(results_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(results_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, 'w') as f: - out = csv.writer(f, delimiter='\t') + with open(status_file, "w") as f: + out = csv.writer(f, delimiter="\t") out.writerow(status) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') - parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests") - parser.add_argument("--in-results-dir", default='/test_output/') - parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') - parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + parser = argparse.ArgumentParser( + description="ClickHouse script for parsing results of functional tests" + ) + parser.add_argument("--in-results-dir", default="/test_output/") + parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") + parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") args = parser.parse_args() state, description, test_results = process_result(args.in_results_dir) diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 81887eb8b8e..f9dfebff3f9 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally). If it fails, fix the style errors following the [code style guide](style.md). +Python code is checked with [black](https://github.com/psf/black/). + ### Report Details - [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html) - `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt). diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 9d1836b0ff2..db78637f104 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -229,6 +229,25 @@ As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion. +## Debugging + +Many graphical IDEs offer with an integrated debugger but you can also use a standalone debugger. + +### GDB + +### LLDB + + # tell LLDB where to find the source code + settings set target.source-map /path/to/build/dir /path/to/source/dir + + # configure LLDB to display code before/after currently executing line + settings set stop-line-count-before 10 + settings set stop-line-count-after 10 + + target create ./clickhouse-client + # + process launch -- --query="SELECT * FROM TAB" + ## Writing Code {#writing-code} The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/ diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index b804b9c2279..61147467690 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -137,7 +137,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index a0acda5d5c6..b70cd225cdd 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -688,7 +688,7 @@ Tags: - `volume_name_N` — Volume name. Volume names must be unique. - `disk` — a disk within a volume. - `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. -- `move_factor` — when the amount of available space gets lower than this factor, data automatically start to move on the next volume if any (by default, 0.1). +- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. - `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. Cofiguration examples: diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md index 6c44f250242..01d7dd5b69f 100644 --- a/docs/en/getting-started/playground.md +++ b/docs/en/getting-started/playground.md @@ -5,30 +5,19 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. - -[ClickHouse Playground](https://play.clickhouse.com) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. -Several example datasets are available in Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). ## Credentials {#credentials} -| Parameter | Value | -|:--------------------|:----------------------------------------| -| HTTPS endpoint | `https://play-api.clickhouse.com:8443` | -| Native TCP endpoint | `play-api.clickhouse.com:9440` | -| User | `playground` | -| Password | `clickhouse` | - -There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above): - -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` - -!!! note "Note" - All these endpoints require a secure TLS connection. +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | ## Limitations {#limitations} @@ -37,23 +26,18 @@ The queries are executed as a read-only user. It implies some limitations: - DDL queries are not allowed - INSERT queries are not allowed -The following settings are also enforced: - -- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time) +The service also have quotas on its usage. ## Examples {#examples} HTTPS endpoint example with `curl`: ``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" +clickhouse client --secure --host play.clickhouse.com --user explorer ``` diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 8a0562383b6..a7066fca087 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -402,7 +402,7 @@ Parsing allows the presence of the additional field `tskv` without the equal sig Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, strings are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ``` bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv @@ -410,7 +410,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR \*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information. -When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. +When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Strings can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing strings without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. If setting [input_format_csv_empty_as_default](../operations/settings/settings.md#settings-input_format_csv_empty_as_default) is enabled, empty unquoted input values are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too. @@ -1396,7 +1396,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1453,7 +1454,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1515,7 +1517,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 9c7fab7424d..ad199ce452e 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -195,5 +195,6 @@ toc_title: Adopters | ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | | АС "Стрела" | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | +| Piwik PRO | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 279204a8af1..9aa6419d89c 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -5,7 +5,7 @@ toc_title: Caches # Cache Types {#cache-types} -When performing queries, ClichHouse uses different caches. +When performing queries, ClickHouse uses different caches. Main cache types: diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 04b29b7cb30..a8ca2079070 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -55,7 +55,7 @@ Internal coordination settings are located in `..` section and contain servers description. @@ -121,7 +121,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro". +The 4lw commands has a allow list configuration `four_letter_word_allow_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro". You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. @@ -201,7 +201,7 @@ Server stats reset. ``` server_id=1 tcp_port=2181 -four_letter_word_white_list=* +four_letter_word_allow_list=* log_storage_path=./coordination/logs snapshot_storage_path=./coordination/snapshots max_requests_batch_size=100 diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index dce7938f98b..ab972c72345 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -36,6 +36,7 @@ Example of configuration: AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY CSV + https://s3.us-east-1.amazonaws.com/yourbucket/mydata/ @@ -44,12 +45,12 @@ Example of configuration: ### Example of using named connections with the s3 function ```sql -INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', +INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') SELECT * FROM numbers(10000); SELECT count() -FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz') +FROM s3(s3_mydata, filename = 'test_file.tsv.gz') ┌─count()─┐ │ 10000 │ diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 266abadb087..88c43c9c3c2 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1616,3 +1616,14 @@ Possible values: Default value: `10000`. +## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds} + +Sets maximum waiting time for global overcommit tracker. + +Possible values: + +- Positive integer. + +Default value: `0`. + + diff --git a/docs/en/operations/settings/memory-overcommit.md b/docs/en/operations/settings/memory-overcommit.md new file mode 100644 index 00000000000..3f99382b826 --- /dev/null +++ b/docs/en/operations/settings/memory-overcommit.md @@ -0,0 +1,31 @@ +# Memory overcommit + +Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries. + +The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use. +When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query. + +When memory limit is reached any query will wait some time during atempt to allocate new memory. +If timeout is passed and memory is freed, the query continues execution. Otherwise an exception will be thrown and the query is killed. + +Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached. + +## User overcommit tracker + +User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list. +Overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage` setting. + +Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting. + +**Example** + +```sql +SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage=4000, memory_usage_overcommit_max_wait_microseconds=500 +``` + +## Global overcommit tracker + +Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries. +In this case overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage_for_user` setting. + +Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 93075284cfc..91bf0812de4 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4220,10 +4220,36 @@ Possible values: - 0 — Disabled. - 1 — Enabled. The wait time equal shutdown_wait_unfinished config. -Default value: 0. +Default value: `0`. ## shutdown_wait_unfinished The waiting time in seconds for currently handled connections when shutdown server. -Default Value: 5. +Default Value: `5`. + +## max_guaranteed_memory_usage + +Maximum guaranteed memory usage for processing of single query. +It represents soft limit in case when hard limit is reached on user level. +Zero means unlimited. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. + +## memory_usage_overcommit_max_wait_microseconds + +Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level. +If the timeout is reached and memory is not freed, an exception is thrown. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. + +## max_guaranteed_memory_usage_for_user + +Maximum guaranteed memory usage for processing all concurrently running queries for the user. +It represents soft limit in case when hard limit is reached on global level. +Zero means unlimited. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md new file mode 100644 index 00000000000..0237885bcb6 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -0,0 +1,48 @@ +--- +toc_priority: 108 +--- + +# groupArraySorted {#groupArraySorted} + +Returns an array with the first N items in ascending order. + +``` sql +groupArraySorted(N)(column) +``` + +**Arguments** + +- `N` – The number of elements to return. + +If the parameter is omitted, default value 10 is used. + +**Arguments** + +- `column` – The value. +- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves. + +**Example** + +Gets the first 10 numbers: + +``` sql +SELECT groupArraySorted(10)(number) FROM numbers(100) +``` + +``` text +┌─groupArraySorted(10)(number)─┐ +│ [0,1,2,3,4,5,6,7,8,9] │ +└──────────────────────────────┘ +``` + +Or the last 10: + +``` sql +SELECT groupArraySorted(10)(number, -number) FROM numbers(100) +``` + +``` text +┌─groupArraySorted(10)(number, negate(number))─┐ +│ [99,98,97,96,95,94,93,92,91,90] │ +└──────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 59befed8785..2a8a2843510 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -35,6 +35,7 @@ ClickHouse-specific aggregate functions: - [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) - [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) +- [groupArraySorted](../../../sql-reference/aggregate-functions/reference/grouparraysorted.md) - [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 8231cda4b77..eebc489fffa 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1392,12 +1392,24 @@ Returns the first element in the `arr1` array for which `func` returns something Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +## arrayFirstOrNull(func, arr1, …) {#array-first-or-null} + +Returns the first element in the `arr1` array for which `func` returns something other than 0. If there are no such element, returns null. + +Note that the `arrayFirstOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + ## arrayLast(func, arr1, …) {#array-last} Returns the last element in the `arr1` array for which `func` returns something other than 0. Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +## arrayLastOrNull(func, arr1, …) {#array-last-or-null} + +Returns the last element in the `arr1` array for which `func` returns something other than 0. If there are no such element, returns null. + +Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + ## arrayFirstIndex(func, arr1, …) {#array-first-index} Returns the index of the first element in the `arr1` array for which `func` returns something other than 0. diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 409ec422ade..ee663c92695 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -114,9 +114,9 @@ In addition, this column is not substituted when using an asterisk in a SELECT q ### EPHEMERAL {#ephemeral} -`EPHEMERAL expr` +`EPHEMERAL [expr]` -Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. +Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required. INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. ### ALIAS {#alias} diff --git a/docs/ja/development/developer-instruction.md b/docs/ja/development/developer-instruction.md index c95dc0e2ea4..48afc77237c 100644 --- a/docs/ja/development/developer-instruction.md +++ b/docs/ja/development/developer-instruction.md @@ -273,7 +273,7 @@ GitHubのUIでforkリポジトリに移動します。 ブランチで開発し プル要求は、作業がまだ完了していない場合でも作成できます。 この場合、単語を入れてください “WIP” (進行中の作業)タイトルの先頭に、それは後で変更することができます。 これは、変更の協調的なレビューと議論、および利用可能なすべてのテストの実行に役立ちます。 変更の簡単な説明を提供することが重要です。 -Yandexの従業員がタグであなたのPRにラベルを付けるとすぐにテストが開始されます “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. +ClickHouseの従業員がタグであなたのPRにラベルを付けるとすぐにテストが開始されます “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. システムは、プル要求用にClickHouseバイナリビルドを個別に準備します。 これらのビルドを取得するには “Details” 次のリンク “ClickHouse build check” 小切手のリストのエントリ。 そこには、ビルドへの直接リンクがあります。ClickHouseのdebパッケージは、本番サーバーにも展開できます(恐れがない場合)。 diff --git a/docs/ja/getting-started/playground.md b/docs/ja/getting-started/playground.md index 4e35096aa4b..01d7dd5b69f 100644 --- a/docs/ja/getting-started/playground.md +++ b/docs/ja/getting-started/playground.md @@ -5,58 +5,39 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. -[ClickHouse Playground](https://play.clickhouse.com) では、サーバーやクラスタを設定することなく、即座にクエリを実行して ClickHouse を試すことができます。 -いくつかの例のデータセットは、Playground だけでなく、ClickHouse の機能を示すサンプルクエリとして利用可能です. また、 ClickHouse の LTS リリースで試すこともできます。 +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). -任意の HTTP クライアントを使用してプレイグラウンドへのクエリを作成することができます。例えば[curl](https://curl.haxx.se)、[wget](https://www.gnu.org/software/wget/)、[JDBC](../interfaces/jdbc.md)または[ODBC](../interfaces/odbc.md)ドライバを使用して接続を設定します。 -ClickHouse をサポートするソフトウェア製品の詳細情報は[こちら](../interfaces/index.md)をご覧ください。 +## Credentials {#credentials} -## 資格情報 {#credentials} +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | -| パラメータ | 値 | -| :---------------------------- | :-------------------------------------- | -| HTTPS エンドポイント | `https://play-api.clickhouse.com:8443` | -| ネイティブ TCP エンドポイント | `play-api.clickhouse.com:9440` | -| ユーザ名 | `playgrounnd` | -| パスワード | `clickhouse` | +## Limitations {#limitations} +The queries are executed as a read-only user. It implies some limitations: -特定のClickHouseのリリースで試すために、追加のエンドポイントがあります。(ポートとユーザー/パスワードは上記と同じです)。 +- DDL queries are not allowed +- INSERT queries are not allowed -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` +The service also have quotas on its usage. -!!! note "備考" -これらのエンドポイントはすべて、安全なTLS接続が必要です。 +## Examples {#examples} - -## 制限事項 {#limitations} - -クエリは読み取り専用のユーザとして実行されます。これにはいくつかの制限があります。 - -- DDL クエリは許可されていません。 -- INSERT クエリは許可されていません。 - -また、以下の設定がなされています。 - -- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time) - -## 例 {#examples} - -`curl` を用いて HTTPSエンドポイントへ接続する例: +HTTPS endpoint example with `curl`: ``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` -[CLI](../interfaces/cli.md) で TCP エンドポイントへ接続する例: +TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" +clickhouse client --secure --host play.clickhouse.com --user explorer ``` diff --git a/docs/ja/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/ja/sql-reference/aggregate-functions/reference/grouparraysorted.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 964d39163d8..5b6740e88bb 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -72,11 +72,11 @@ ClickHouse не работает и не собирается на 32-битны Этот вариант не подходит для отправки изменений на сервер. Вы можете временно его использовать, а затем добавить ssh ключи и заменить адрес репозитория с помощью команды `git remote`. -Вы можете также добавить для своего локального репозитория адрес оригинального репозитория Яндекса, чтобы притягивать оттуда обновления: +Вы можете также добавить для своего локального репозитория адрес оригинального репозитория, чтобы притягивать оттуда обновления: git remote add upstream git@github.com:ClickHouse/ClickHouse.git -После этого, вы сможете добавлять в свой репозиторий обновления из репозитория Яндекса с помощью команды `git pull upstream master`. +После этого, вы сможете добавлять в свой репозиторий обновления из репозитория ClickHouse с помощью команды `git pull upstream master`. ### Работа с сабмодулями Git {#rabota-s-sabmoduliami-git} @@ -288,7 +288,7 @@ sudo ./llvm.sh 12 Pull request можно создать, даже если работа над задачей ещё не завершена. В этом случае, добавьте в его название слово «WIP» (work in progress). Название можно будет изменить позже. Это полезно для совместного просмотра и обсуждения изменений, а также для запуска всех имеющихся тестов. Введите краткое описание изменений - впоследствии, оно будет использовано для релизных changelog. -Тесты будут запущены, как только сотрудники Яндекса поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. +Тесты будут запущены, как только сотрудники ClickHouse поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 3f140f85396..c75fa8e92ce 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -678,7 +678,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); - `volume_name_N` — название тома. Названия томов должны быть уникальны. - `disk` — диск, находящийся внутри тома. - `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том. -- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). +- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты. - `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками. Примеры конфигураций: diff --git a/docs/ru/getting-started/playground.md b/docs/ru/getting-started/playground.md index d9f65e192b5..01d7dd5b69f 100644 --- a/docs/ru/getting-started/playground.md +++ b/docs/ru/getting-started/playground.md @@ -5,53 +5,39 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. -[ClickHouse Playground](https://play.clickhouse.com) позволяет пользователям экспериментировать с ClickHouse, мгновенно выполняя запросы без настройки своего сервера или кластера. -В Playground доступны несколько тестовых массивов данных, а также примеры запросов, которые показывают возможности ClickHouse. Кроме того, вы можете выбрать LTS релиз ClickHouse, который хотите протестировать. +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). -Вы можете отправлять запросы к Playground с помощью любого HTTP-клиента, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), также можно установить соединение с помощью драйверов [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Более подробная информация о программных продуктах, поддерживающих ClickHouse, доступна [здесь](../interfaces/index.md). +## Credentials {#credentials} -## Параметры доступа {#credentials} +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | -| Параметр | Значение | -|:--------------------|:----------------------------------------| -| Конечная точка HTTPS| `https://play-api.clickhouse.com:8443` | -| Конечная точка TCP | `play-api.clickhouse.com:9440` | -| Пользователь | `playground` | -| Пароль | `clickhouse` | +## Limitations {#limitations} -Также можно подключаться к ClickHouse определённых релизов, чтобы протестировать их различия (порты и пользователь / пароль остаются неизменными): +The queries are executed as a read-only user. It implies some limitations: -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` +- DDL queries are not allowed +- INSERT queries are not allowed -!!! note "Примечание" - Для всех этих конечных точек требуется безопасное соединение TLS. +The service also have quotas on its usage. -## Ограничения {#limitations} +## Examples {#examples} -Запросы выполняются под пользователем с правами `readonly`, для которого есть следующие ограничения: -- запрещены DDL запросы -- запрещены INSERT запросы - -Также установлены следующие опции: -- [max_result_bytes=10485760](../operations/settings/query-complexity.md#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query-complexity.md#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query-complexity.md#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query-complexity.md#max-execution-time) - -## Примеры {#examples} - -Пример конечной точки HTTPS с `curl`: +HTTPS endpoint example with `curl`: ``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` -Пример конечной точки TCP с [CLI](../interfaces/cli.md): +TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" +clickhouse client --secure --host play.clickhouse.com --user explorer ``` diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md index 2f3f3c0f63c..fe0f7d12893 100644 --- a/docs/ru/operations/clickhouse-keeper.md +++ b/docs/ru/operations/clickhouse-keeper.md @@ -54,7 +54,7 @@ ClickHouse Keeper может использоваться как равноце - `auto_forwarding` — разрешить пересылку запросов на запись от последователей лидеру (по умолчанию: true). - `shutdown_timeout` — время ожидания завершения внутренних подключений и выключения, в миллисекундах (по умолчанию: 5000). - `startup_timeout` — время отключения сервера, если он не подключается к другим участникам кворума, в миллисекундах (по умолчанию: 30000). -- `four_letter_word_white_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro"). +- `four_letter_word_allow_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro"). Конфигурация кворума находится в `.` и содержит описание серверов. @@ -114,7 +114,7 @@ clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon ClickHouse Keeper также поддерживает 4-х буквенные команды, почти такие же, как у Zookeeper. Каждая команда состоит из 4-х символов, например, `mntr`, `stat` и т. д. Несколько интересных команд: `stat` предоставляет общую информацию о сервере и подключенных клиентах, а `srvr` и `cons` предоставляют расширенные сведения о сервере и подключениях соответственно. -У 4-х буквенных команд есть параметр для настройки разрешенного списка `four_letter_word_white_list`, который имеет значение по умолчанию "conf,cons,crst,envi,ruok,srst,srvr,stat, wchc,wchs,dirs,mntr,isro". +У 4-х буквенных команд есть параметр для настройки разрешенного списка `four_letter_word_allow_list`, который имеет значение по умолчанию "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro". Вы можете отправлять команды в ClickHouse Keeper через telnet или nc на порт для клиента. @@ -194,7 +194,7 @@ Server stats reset. ``` server_id=1 tcp_port=2181 -four_letter_word_white_list=* +four_letter_word_allow_list=* log_storage_path=./coordination/logs snapshot_storage_path=./coordination/snapshots max_requests_batch_size=100 diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index b9c2a4f0f0b..48cce437b8d 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -110,9 +110,9 @@ SELECT x, toTypeName(x) FROM t1; ### EPHEMERAL {#ephemeral} -`EPHEMERAL expr` +`EPHEMERAL [expr]` -Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. +Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. Если значение по умолчанию `expr` не указано, то тип колонки должен быть специфицирован. INSERT без списка столбцов игнорирует этот столбец, таким образом сохраняется инвариант - т.е. дамп, полученный путём `SELECT *`, можно вставить обратно в таблицу INSERT-ом без указания списка столбцов. ### ALIAS {#alias} diff --git a/docs/tools/amp.py b/docs/tools/amp.py index 22417407946..584a40c4bba 100644 --- a/docs/tools/amp.py +++ b/docs/tools/amp.py @@ -15,24 +15,24 @@ import website def prepare_amp_html(lang, args, root, site_temp, main_site_dir): src_path = root - src_index = os.path.join(src_path, 'index.html') + src_index = os.path.join(src_path, "index.html") rel_path = os.path.relpath(src_path, site_temp) - dst_path = os.path.join(main_site_dir, rel_path, 'amp') - dst_index = os.path.join(dst_path, 'index.html') + dst_path = os.path.join(main_site_dir, rel_path, "amp") + dst_index = os.path.join(dst_path, "index.html") - logging.debug(f'Generating AMP version for {rel_path} ({lang})') + logging.debug(f"Generating AMP version for {rel_path} ({lang})") os.makedirs(dst_path) - with open(src_index, 'r') as f: + with open(src_index, "r") as f: content = f.read() - css_in = ' '.join(website.get_css_in(args)) + css_in = " ".join(website.get_css_in(args)) command = f"purifycss --min {css_in} '{src_index}'" logging.debug(command) - inline_css = subprocess.check_output(command, shell=True).decode('utf-8') - inline_css = inline_css.replace('!important', '').replace('/*!', '/*') + inline_css = subprocess.check_output(command, shell=True).decode("utf-8") + inline_css = inline_css.replace("!important", "").replace("/*!", "/*") inline_css = cssmin.cssmin(inline_css) - content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css) + content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css) - with open(dst_index, 'w') as f: + with open(dst_index, "w") as f: f.write(content) return dst_index @@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir): def build_amp(lang, args, cfg): # AMP docs: https://amp.dev/documentation/ - logging.info(f'Building AMP version for {lang}') + logging.info(f"Building AMP version for {lang}") with util.temp_dir() as site_temp: - extra = cfg.data['extra'] - main_site_dir = cfg.data['site_dir'] - extra['is_amp'] = True - cfg.load_dict({ - 'site_dir': site_temp, - 'extra': extra - }) + extra = cfg.data["extra"] + main_site_dir = cfg.data["site_dir"] + extra["is_amp"] = True + cfg.load_dict({"site_dir": site_temp, "extra": extra}) try: mkdocs.commands.build.build(cfg) @@ -60,50 +57,49 @@ def build_amp(lang, args, cfg): paths = [] for root, _, filenames in os.walk(site_temp): - if 'index.html' in filenames: - paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir)) - logging.info(f'Finished building AMP version for {lang}') + if "index.html" in filenames: + paths.append( + prepare_amp_html(lang, args, root, site_temp, main_site_dir) + ) + logging.info(f"Finished building AMP version for {lang}") def html_to_amp(content): - soup = bs4.BeautifulSoup( - content, - features='html.parser' - ) + soup = bs4.BeautifulSoup(content, features="html.parser") for tag in soup.find_all(): - if tag.attrs.get('id') == 'tostring': - tag.attrs['id'] = '_tostring' - if tag.name == 'img': - tag.name = 'amp-img' - tag.attrs['layout'] = 'responsive' - src = tag.attrs['src'] - if not (src.startswith('/') or src.startswith('http')): - tag.attrs['src'] = f'../{src}' - if not tag.attrs.get('width'): - tag.attrs['width'] = '640' - if not tag.attrs.get('height'): - tag.attrs['height'] = '320' - if tag.name == 'iframe': - tag.name = 'amp-iframe' - tag.attrs['layout'] = 'responsive' - del tag.attrs['alt'] - del tag.attrs['allowfullscreen'] - if not tag.attrs.get('width'): - tag.attrs['width'] = '640' - if not tag.attrs.get('height'): - tag.attrs['height'] = '320' - elif tag.name == 'a': - href = tag.attrs.get('href') + if tag.attrs.get("id") == "tostring": + tag.attrs["id"] = "_tostring" + if tag.name == "img": + tag.name = "amp-img" + tag.attrs["layout"] = "responsive" + src = tag.attrs["src"] + if not (src.startswith("/") or src.startswith("http")): + tag.attrs["src"] = f"../{src}" + if not tag.attrs.get("width"): + tag.attrs["width"] = "640" + if not tag.attrs.get("height"): + tag.attrs["height"] = "320" + if tag.name == "iframe": + tag.name = "amp-iframe" + tag.attrs["layout"] = "responsive" + del tag.attrs["alt"] + del tag.attrs["allowfullscreen"] + if not tag.attrs.get("width"): + tag.attrs["width"] = "640" + if not tag.attrs.get("height"): + tag.attrs["height"] = "320" + elif tag.name == "a": + href = tag.attrs.get("href") if href: - if not (href.startswith('/') or href.startswith('http')): - if '#' in href: - href, anchor = href.split('#') + if not (href.startswith("/") or href.startswith("http")): + if "#" in href: + href, anchor = href.split("#") else: anchor = None - href = f'../{href}amp/' + href = f"../{href}amp/" if anchor: - href = f'{href}#{anchor}' - tag.attrs['href'] = href + href = f"{href}#{anchor}" + tag.attrs["href"] = href content = str(soup) return website.minify_html(content) diff --git a/docs/tools/blog.py b/docs/tools/blog.py index b58523504a3..d1fc540d8bf 100644 --- a/docs/tools/blog.py +++ b/docs/tools/blog.py @@ -17,54 +17,52 @@ import util def build_for_lang(lang, args): - logging.info(f'Building {lang} blog') + logging.info(f"Building {lang} blog") try: theme_cfg = { - 'name': None, - 'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), - 'language': lang, - 'direction': 'ltr', - 'static_templates': ['404.html'], - 'extra': { - 'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching - } + "name": None, + "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir), + "language": lang, + "direction": "ltr", + "static_templates": ["404.html"], + "extra": { + "now": int( + time.mktime(datetime.datetime.now().timetuple()) + ) # TODO better way to avoid caching + }, } # the following list of languages is sorted according to # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers - languages = { - 'en': 'English' - } + languages = {"en": "English"} - site_names = { - 'en': 'ClickHouse Blog' - } + site_names = {"en": "ClickHouse Blog"} assert len(site_names) == len(languages) site_dir = os.path.join(args.blog_output_dir, lang) - plugins = ['macros'] + plugins = ["macros"] if args.htmlproofer: - plugins.append('htmlproofer') + plugins.append("htmlproofer") - website_url = 'https://clickhouse.com' - site_name = site_names.get(lang, site_names['en']) + website_url = "https://clickhouse.com" + site_name = site_names.get(lang, site_names["en"]) blog_nav, post_meta = nav.build_blog_nav(lang, args) raw_config = dict( site_name=site_name, - site_url=f'{website_url}/blog/{lang}/', + site_url=f"{website_url}/blog/{lang}/", docs_dir=os.path.join(args.blog_dir, lang), site_dir=site_dir, strict=True, theme=theme_cfg, nav=blog_nav, - copyright='©2016–2022 ClickHouse, Inc.', + copyright="©2016–2022 ClickHouse, Inc.", use_directory_urls=True, - repo_name='ClickHouse/ClickHouse', - repo_url='https://github.com/ClickHouse/ClickHouse/', - edit_uri=f'edit/master/website/blog/{lang}', + repo_name="ClickHouse/ClickHouse", + repo_url="https://github.com/ClickHouse/ClickHouse/", + edit_uri=f"edit/master/website/blog/{lang}", markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, plugins=plugins, extra=dict( @@ -75,12 +73,12 @@ def build_for_lang(lang, args): website_url=website_url, events=args.events, languages=languages, - includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), + includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"), is_amp=False, is_blog=True, post_meta=post_meta, - today=datetime.date.today().isoformat() - ) + today=datetime.date.today().isoformat(), + ), ) cfg = config.load_config(**raw_config) @@ -89,21 +87,28 @@ def build_for_lang(lang, args): redirects.build_blog_redirects(args) env = util.init_jinja2_env(args) - with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f: - rss_template_string = f.read().decode('utf-8').strip() + with open( + os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb" + ) as f: + rss_template_string = f.read().decode("utf-8").strip() rss_template = env.from_string(rss_template_string) - with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f: - f.write(rss_template.render({'config': raw_config})) + with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f: + f.write(rss_template.render({"config": raw_config})) - logging.info(f'Finished building {lang} blog') + logging.info(f"Finished building {lang} blog") except exceptions.ConfigurationError as e: - raise SystemExit('\n' + str(e)) + raise SystemExit("\n" + str(e)) def build_blog(args): tasks = [] - for lang in args.blog_lang.split(','): + for lang in args.blog_lang.split(","): if lang: - tasks.append((lang, args,)) + tasks.append( + ( + lang, + args, + ) + ) util.run_function_in_parallel(build_for_lang, tasks, threads=False) diff --git a/docs/tools/build.py b/docs/tools/build.py index e4f6718699a..612be0229d3 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -30,76 +30,76 @@ import website from cmake_in_clickhouse_generator import generate_cmake_flags_files + class ClickHouseMarkdown(markdown.extensions.Extension): class ClickHousePreprocessor(markdown.util.Processor): def run(self, lines): for line in lines: - if '' not in line: + if "" not in line: yield line def extendMarkdown(self, md): - md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31) + md.preprocessors.register( + self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31 + ) markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown def build_for_lang(lang, args): - logging.info(f'Building {lang} docs') - os.environ['SINGLE_PAGE'] = '0' + logging.info(f"Building {lang} docs") + os.environ["SINGLE_PAGE"] = "0" try: theme_cfg = { - 'name': None, - 'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), - 'language': lang, - 'direction': 'rtl' if lang == 'fa' else 'ltr', - 'static_templates': ['404.html'], - 'extra': { - 'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching - } + "name": None, + "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir), + "language": lang, + "direction": "rtl" if lang == "fa" else "ltr", + "static_templates": ["404.html"], + "extra": { + "now": int( + time.mktime(datetime.datetime.now().timetuple()) + ) # TODO better way to avoid caching + }, } # the following list of languages is sorted according to # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers - languages = { - 'en': 'English', - 'zh': '中文', - 'ru': 'Русский', - 'ja': '日本語' - } + languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"} site_names = { - 'en': 'ClickHouse %s Documentation', - 'zh': 'ClickHouse文档 %s', - 'ru': 'Документация ClickHouse %s', - 'ja': 'ClickHouseドキュメント %s' + "en": "ClickHouse %s Documentation", + "zh": "ClickHouse文档 %s", + "ru": "Документация ClickHouse %s", + "ja": "ClickHouseドキュメント %s", } assert len(site_names) == len(languages) site_dir = os.path.join(args.docs_output_dir, lang) - plugins = ['macros'] + plugins = ["macros"] if args.htmlproofer: - plugins.append('htmlproofer') + plugins.append("htmlproofer") - website_url = 'https://clickhouse.com' - site_name = site_names.get(lang, site_names['en']) % '' - site_name = site_name.replace(' ', ' ') + website_url = "https://clickhouse.com" + site_name = site_names.get(lang, site_names["en"]) % "" + site_name = site_name.replace(" ", " ") raw_config = dict( site_name=site_name, - site_url=f'{website_url}/docs/{lang}/', + site_url=f"{website_url}/docs/{lang}/", docs_dir=os.path.join(args.docs_dir, lang), site_dir=site_dir, strict=True, theme=theme_cfg, - copyright='©2016–2022 ClickHouse, Inc.', + copyright="©2016–2022 ClickHouse, Inc.", use_directory_urls=True, - repo_name='ClickHouse/ClickHouse', - repo_url='https://github.com/ClickHouse/ClickHouse/', - edit_uri=f'edit/master/docs/{lang}', + repo_name="ClickHouse/ClickHouse", + repo_url="https://github.com/ClickHouse/ClickHouse/", + edit_uri=f"edit/master/docs/{lang}", markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, plugins=plugins, extra=dict( @@ -111,16 +111,16 @@ def build_for_lang(lang, args): website_url=website_url, events=args.events, languages=languages, - includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), + includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"), is_amp=False, - is_blog=False - ) + is_blog=False, + ), ) # Clean to be safe if last build finished abnormally single_page.remove_temporary_files(lang, args) - raw_config['nav'] = nav.build_docs_nav(lang, args) + raw_config["nav"] = nav.build_docs_nav(lang, args) cfg = config.load_config(**raw_config) @@ -131,21 +131,28 @@ def build_for_lang(lang, args): amp.build_amp(lang, args, cfg) if not args.skip_single_page: - single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg) + single_page.build_single_page_version( + lang, args, raw_config.get("nav"), cfg + ) mdx_clickhouse.PatchedMacrosPlugin.disabled = False - logging.info(f'Finished building {lang} docs') + logging.info(f"Finished building {lang} docs") except exceptions.ConfigurationError as e: - raise SystemExit('\n' + str(e)) + raise SystemExit("\n" + str(e)) def build_docs(args): tasks = [] - for lang in args.lang.split(','): + for lang in args.lang.split(","): if lang: - tasks.append((lang, args,)) + tasks.append( + ( + lang, + args, + ) + ) util.run_function_in_parallel(build_for_lang, tasks, threads=False) redirects.build_docs_redirects(args) @@ -171,56 +178,64 @@ def build(args): redirects.build_static_redirects(args) -if __name__ == '__main__': - os.chdir(os.path.join(os.path.dirname(__file__), '..')) +if __name__ == "__main__": + os.chdir(os.path.join(os.path.dirname(__file__), "..")) # A root path to ClickHouse source code. - src_dir = '..' + src_dir = ".." - website_dir = os.path.join(src_dir, 'website') + website_dir = os.path.join(src_dir, "website") arg_parser = argparse.ArgumentParser() - arg_parser.add_argument('--lang', default='en,ru,zh,ja') - arg_parser.add_argument('--blog-lang', default='en') - arg_parser.add_argument('--docs-dir', default='.') - arg_parser.add_argument('--theme-dir', default=website_dir) - arg_parser.add_argument('--website-dir', default=website_dir) - arg_parser.add_argument('--src-dir', default=src_dir) - arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog')) - arg_parser.add_argument('--output-dir', default='build') - arg_parser.add_argument('--nav-limit', type=int, default='0') - arg_parser.add_argument('--skip-multi-page', action='store_true') - arg_parser.add_argument('--skip-single-page', action='store_true') - arg_parser.add_argument('--skip-amp', action='store_true') - arg_parser.add_argument('--skip-website', action='store_true') - arg_parser.add_argument('--skip-blog', action='store_true') - arg_parser.add_argument('--skip-git-log', action='store_true') - arg_parser.add_argument('--skip-docs', action='store_true') - arg_parser.add_argument('--test-only', action='store_true') - arg_parser.add_argument('--minify', action='store_true') - arg_parser.add_argument('--htmlproofer', action='store_true') - arg_parser.add_argument('--no-docs-macros', action='store_true') - arg_parser.add_argument('--save-raw-single-page', type=str) - arg_parser.add_argument('--livereload', type=int, default='0') - arg_parser.add_argument('--verbose', action='store_true') + arg_parser.add_argument("--lang", default="en,ru,zh,ja") + arg_parser.add_argument("--blog-lang", default="en") + arg_parser.add_argument("--docs-dir", default=".") + arg_parser.add_argument("--theme-dir", default=website_dir) + arg_parser.add_argument("--website-dir", default=website_dir) + arg_parser.add_argument("--src-dir", default=src_dir) + arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog")) + arg_parser.add_argument("--output-dir", default="build") + arg_parser.add_argument("--nav-limit", type=int, default="0") + arg_parser.add_argument("--skip-multi-page", action="store_true") + arg_parser.add_argument("--skip-single-page", action="store_true") + arg_parser.add_argument("--skip-amp", action="store_true") + arg_parser.add_argument("--skip-website", action="store_true") + arg_parser.add_argument("--skip-blog", action="store_true") + arg_parser.add_argument("--skip-git-log", action="store_true") + arg_parser.add_argument("--skip-docs", action="store_true") + arg_parser.add_argument("--test-only", action="store_true") + arg_parser.add_argument("--minify", action="store_true") + arg_parser.add_argument("--htmlproofer", action="store_true") + arg_parser.add_argument("--no-docs-macros", action="store_true") + arg_parser.add_argument("--save-raw-single-page", type=str) + arg_parser.add_argument("--livereload", type=int, default="0") + arg_parser.add_argument("--verbose", action="store_true") args = arg_parser.parse_args() args.minify = False # TODO remove logging.basicConfig( - level=logging.DEBUG if args.verbose else logging.INFO, - stream=sys.stderr + level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr ) - logging.getLogger('MARKDOWN').setLevel(logging.INFO) + logging.getLogger("MARKDOWN").setLevel(logging.INFO) - args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs') - args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog') + args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs") + args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog") from github import get_events - args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip() - args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip() - args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}' + + args.rev = ( + subprocess.check_output("git rev-parse HEAD", shell=True) + .decode("utf-8") + .strip() + ) + args.rev_short = ( + subprocess.check_output("git rev-parse --short HEAD", shell=True) + .decode("utf-8") + .strip() + ) + args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}" args.events = get_events(args) if args.test_only: @@ -233,18 +248,20 @@ if __name__ == '__main__': mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True from build import build + build(args) if args.livereload: - new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')] - new_args = sys.executable + ' ' + ' '.join(new_args) + new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")] + new_args = sys.executable + " " + " ".join(new_args) server = livereload.Server() - server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True)) - server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True)) - server.serve( - root=args.output_dir, - host='0.0.0.0', - port=args.livereload + server.watch( + args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True) ) + server.watch( + args.website_dir + "**/*", + livereload.shell(new_args, cwd="tools", shell=True), + ) + server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload) sys.exit(0) diff --git a/docs/tools/cmake_in_clickhouse_generator.py b/docs/tools/cmake_in_clickhouse_generator.py index aa4cbbddd18..9bbc94fd206 100644 --- a/docs/tools/cmake_in_clickhouse_generator.py +++ b/docs/tools/cmake_in_clickhouse_generator.py @@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict Entity = Tuple[str, str, str] # https://regex101.com/r/R6iogw/12 -cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$" +cmake_option_regex: str = ( + r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$" +) ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/" -name_str: str = "[`{name}`](" + ch_master_url + "{path}#L{line})" +name_str: str = '[`{name}`](' + ch_master_url + "{path}#L{line})" default_anchor_str: str = "[`{name}`](#{anchor})" comment_var_regex: str = r"\${(.+)}" @@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {} def make_anchor(t: str) -> str: - return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]) + return "".join( + ["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"] + ) + def process_comment(comment: str) -> str: return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE) + def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None: (line, comment) = line_comment (name, description, default) = entity @@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No formatted_default: str = "`" + default + "`" formatted_name: str = name_str.format( - anchor=make_anchor(name), - name=name, - path=path, - line=line) + anchor=make_anchor(name), name=name, path=path, line=line + ) formatted_description: str = "".join(description.split("\n")) formatted_comment: str = process_comment(comment) formatted_entity: str = "| {} | {} | {} | {} |".format( - formatted_name, formatted_default, formatted_description, formatted_comment) + formatted_name, formatted_default, formatted_description, formatted_comment + ) entities[name] = path, formatted_entity + def process_file(root_path: str, file_path: str, file_name: str) -> None: - with open(os.path.join(file_path, file_name), 'r') as cmake_file: + with open(os.path.join(file_path, file_name), "r") as cmake_file: contents: str = cmake_file.read() def get_line_and_comment(target: str) -> Tuple[int, str]: @@ -70,10 +76,10 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None: comment: str = "" for n, line in enumerate(contents_list): - if 'option' not in line.lower() or target not in line: + if "option" not in line.lower() or target not in line: continue - for maybe_comment_line in contents_list[n - 1::-1]: + for maybe_comment_line in contents_list[n - 1 :: -1]: if not re.match("\s*#\s*", maybe_comment_line): break @@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None: # line numbering starts with 1 return n + 1, comment - matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE) + matches: Optional[List[Entity]] = re.findall( + cmake_option_regex, contents, re.MULTILINE + ) - - file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name) - if file_rel_path_with_name.startswith('/'): + file_rel_path_with_name: str = os.path.join( + file_path[len(root_path) :], file_name + ) + if file_rel_path_with_name.startswith("/"): file_rel_path_with_name = file_rel_path_with_name[1:] if matches: for entity in matches: - build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0])) + build_entity( + file_rel_path_with_name, entity, get_line_and_comment(entity[0]) + ) + def process_folder(root_path: str, name: str) -> None: for root, _, files in os.walk(os.path.join(root_path, name)): @@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None: if f == "CMakeLists.txt" or ".cmake" in f: process_file(root_path, root, f) -def generate_cmake_flags_files() -> None: - root_path: str = os.path.join(os.path.dirname(__file__), '..', '..') - output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md") - header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md") - footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md") +def generate_cmake_flags_files() -> None: + root_path: str = os.path.join(os.path.dirname(__file__), "..", "..") + + output_file_name: str = os.path.join( + root_path, "docs/en/development/cmake-in-clickhouse.md" + ) + header_file_name: str = os.path.join( + root_path, "docs/_includes/cmake_in_clickhouse_header.md" + ) + footer_file_name: str = os.path.join( + root_path, "docs/_includes/cmake_in_clickhouse_footer.md" + ) process_file(root_path, root_path, "CMakeLists.txt") process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt") @@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None: f.write(entities[k][1] + "\n") ignored_keys.append(k) - f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" + - table_header) + f.write( + "\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" + + table_header + ) for k in sorted_keys: if k.startswith("ENABLE_") and ".cmake" in entities[k][0]: @@ -143,15 +164,18 @@ def generate_cmake_flags_files() -> None: with open(footer_file_name, "r") as footer: f.write(footer.read()) - other_languages = ["docs/ja/development/cmake-in-clickhouse.md", - "docs/zh/development/cmake-in-clickhouse.md", - "docs/ru/development/cmake-in-clickhouse.md"] + other_languages = [ + "docs/ja/development/cmake-in-clickhouse.md", + "docs/zh/development/cmake-in-clickhouse.md", + "docs/ru/development/cmake-in-clickhouse.md", + ] for lang in other_languages: other_file_name = os.path.join(root_path, lang) if os.path.exists(other_file_name): - os.unlink(other_file_name) + os.unlink(other_file_name) os.symlink(output_file_name, other_file_name) -if __name__ == '__main__': + +if __name__ == "__main__": generate_cmake_flags_files() diff --git a/docs/tools/easy_diff.py b/docs/tools/easy_diff.py index 22d305d3da3..14e3ca91776 100755 --- a/docs/tools/easy_diff.py +++ b/docs/tools/easy_diff.py @@ -8,7 +8,7 @@ import contextlib from git import cmd from tempfile import NamedTemporaryFile -SCRIPT_DESCRIPTION = ''' +SCRIPT_DESCRIPTION = """ usage: ./easy_diff.py language/document path Show the difference between a language document and an English document. @@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = ''' OPTIONS: -h, --help show this help message and exit --no-pager use stdout as difference result output -''' +""" SCRIPT_PATH = os.path.abspath(__file__) -CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..') +CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..") SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME) SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False) -SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None) -SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False) -SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False) +SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None) +SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False) +SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False) def execute(commands): @@ -70,19 +70,41 @@ def execute(commands): def get_hash(file_name): - return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name]) + return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name]) def diff_file(reference_file, working_file, out): if not os.path.exists(reference_file): - raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.') + raise RuntimeError( + "reference file [" + os.path.abspath(reference_file) + "] is not exists." + ) if os.path.islink(working_file): out.writelines(["Need translate document:" + os.path.abspath(reference_file)]) elif not os.path.exists(working_file): - out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)]) + out.writelines( + [ + "Need link document " + + os.path.abspath(reference_file) + + " to " + + os.path.abspath(working_file) + ] + ) elif get_hash(working_file) != get_hash(reference_file): - out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))]) + out.writelines( + [ + ( + execute( + [ + "git", + "diff", + get_hash(working_file).strip('"'), + reference_file, + ] + ).encode("utf-8") + ) + ] + ) return 0 @@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out): for list_item in os.listdir(reference_directory): working_item = os.path.join(working_directory, list_item) reference_item = os.path.join(reference_directory, list_item) - if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0: + if ( + diff_file(reference_item, working_item, out) + if os.path.isfile(reference_item) + else diff_directory(reference_item, working_item, out) != 0 + ): return 1 return 0 -def find_language_doc(custom_document, other_language='en', children=[]): +def find_language_doc(custom_document, other_language="en", children=[]): if len(custom_document) == 0: - raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.") + raise RuntimeError( + "The " + + os.path.join(custom_document, *children) + + " is not in docs directory." + ) - if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document): - return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:]) + if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document): + return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:]) children.insert(0, os.path.split(custom_document)[1]) - return find_language_doc(os.path.split(custom_document)[0], other_language, children) + return find_language_doc( + os.path.split(custom_document)[0], other_language, children + ) class ToPager: @@ -119,7 +151,7 @@ class ToPager: def close(self): self.temp_named_file.flush() - git_pager = execute(['git', 'var', 'GIT_PAGER']) + git_pager = execute(["git", "var", "GIT_PAGER"]) subprocess.check_call([git_pager, self.temp_named_file.name]) self.temp_named_file.close() @@ -135,12 +167,20 @@ class ToStdOut: self.system_stdout_stream = system_stdout_stream -if __name__ == '__main__': +if __name__ == "__main__": arguments = SCRIPT_COMMAND_PARSER.parse_args() if arguments.help or not arguments.path: sys.stdout.write(SCRIPT_DESCRIPTION) sys.exit(0) - working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path) - with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer: - exit(diff_directory(find_language_doc(working_language), working_language, writer)) + working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path) + with contextlib.closing( + ToStdOut(sys.stdout) + if arguments.no_pager + else ToPager(NamedTemporaryFile("r+")) + ) as writer: + exit( + diff_directory( + find_language_doc(working_language), working_language, writer + ) + ) diff --git a/docs/tools/github.py b/docs/tools/github.py index 465695d1512..3a6f155e25d 100644 --- a/docs/tools/github.py +++ b/docs/tools/github.py @@ -16,27 +16,26 @@ import util def get_events(args): events = [] skip = True - with open(os.path.join(args.docs_dir, '..', 'README.md')) as f: + with open(os.path.join(args.docs_dir, "..", "README.md")) as f: for line in f: if skip: - if 'Upcoming Events' in line: + if "Upcoming Events" in line: skip = False else: if not line: continue - line = line.strip().split('](') + line = line.strip().split("](") if len(line) == 2: - tail = line[1].split(') ') - events.append({ - 'signup_link': tail[0], - 'event_name': line[0].replace('* [', ''), - 'event_date': tail[1].replace('on ', '').replace('.', '') - }) + tail = line[1].split(") ") + events.append( + { + "signup_link": tail[0], + "event_name": line[0].replace("* [", ""), + "event_date": tail[1].replace("on ", "").replace(".", ""), + } + ) return events -if __name__ == '__main__': - logging.basicConfig( - level=logging.DEBUG, - stream=sys.stderr - ) +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index 18ecc890b6e..6b5a5bb5813 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -16,74 +16,79 @@ import slugify as slugify_impl def slugify(value, separator): - return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) + return slugify_impl.slugify( + value, separator=separator, word_boundary=True, save_order=True + ) MARKDOWN_EXTENSIONS = [ - 'mdx_clickhouse', - 'admonition', - 'attr_list', - 'def_list', - 'codehilite', - 'nl2br', - 'sane_lists', - 'pymdownx.details', - 'pymdownx.magiclink', - 'pymdownx.superfences', - 'extra', - { - 'toc': { - 'permalink': True, - 'slugify': slugify - } - } + "mdx_clickhouse", + "admonition", + "attr_list", + "def_list", + "codehilite", + "nl2br", + "sane_lists", + "pymdownx.details", + "pymdownx.magiclink", + "pymdownx.superfences", + "extra", + {"toc": {"permalink": True, "slugify": slugify}}, ] class ClickHouseLinkMixin(object): - def handleMatch(self, m, data): - single_page = (os.environ.get('SINGLE_PAGE') == '1') + single_page = os.environ.get("SINGLE_PAGE") == "1" try: el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data) except IndexError: return if el is not None: - href = el.get('href') or '' - is_external = href.startswith('http:') or href.startswith('https:') + href = el.get("href") or "" + is_external = href.startswith("http:") or href.startswith("https:") if is_external: - if not href.startswith('https://clickhouse.com'): - el.set('rel', 'external nofollow noreferrer') + if not href.startswith("https://clickhouse.com"): + el.set("rel", "external nofollow noreferrer") elif single_page: - if '#' in href: - el.set('href', '#' + href.split('#', 1)[1]) + if "#" in href: + el.set("href", "#" + href.split("#", 1)[1]) else: - el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/')) + el.set( + "href", "#" + href.replace("/index.md", "/").replace(".md", "/") + ) return el, start, end -class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor): +class ClickHouseAutolinkPattern( + ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor +): pass -class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor): +class ClickHouseLinkPattern( + ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor +): pass class ClickHousePreprocessor(markdown.util.Processor): def run(self, lines): for line in lines: - if '' not in line: + if "" not in line: yield line class ClickHouseMarkdown(markdown.extensions.Extension): - def extendMarkdown(self, md, md_globals): - md.preprocessors['clickhouse'] = ClickHousePreprocessor() - md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md) - md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md) + md.preprocessors["clickhouse"] = ClickHousePreprocessor() + md.inlinePatterns["link"] = ClickHouseLinkPattern( + markdown.inlinepatterns.LINK_RE, md + ) + md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern( + markdown.inlinepatterns.AUTOLINK_RE, md + ) def makeExtension(**kwargs): @@ -92,10 +97,8 @@ def makeExtension(**kwargs): def get_translations(dirname, lang): import babel.support - return babel.support.Translations.load( - dirname=dirname, - locales=[lang, 'en'] - ) + + return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"]) class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): @@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): def on_config(self, config): super(PatchedMacrosPlugin, self).on_config(config) - self.env.comment_start_string = '{##' - self.env.comment_end_string = '##}' - self.env.loader = jinja2.FileSystemLoader([ - os.path.join(config.data['site_dir']), - os.path.join(config.data['extra']['includes_dir']) - ]) + self.env.comment_start_string = "{##" + self.env.comment_end_string = "##}" + self.env.loader = jinja2.FileSystemLoader( + [ + os.path.join(config.data["site_dir"]), + os.path.join(config.data["extra"]["includes_dir"]), + ] + ) def on_env(self, env, config, files): import util - env.add_extension('jinja2.ext.i18n') - dirname = os.path.join(config.data['theme'].dirs[0], 'locale') - lang = config.data['theme']['language'] - env.install_gettext_translations( - get_translations(dirname, lang), - newstyle=True - ) + + env.add_extension("jinja2.ext.i18n") + dirname = os.path.join(config.data["theme"].dirs[0], "locale") + lang = config.data["theme"]["language"] + env.install_gettext_translations(get_translations(dirname, lang), newstyle=True) util.init_jinja2_filters(env) return env @@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): return markdown def on_page_markdown(self, markdown, page, config, files): - markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files) + markdown = super(PatchedMacrosPlugin, self).on_page_markdown( + markdown, page, config, files + ) if os.path.islink(page.file.abs_src_path): - lang = config.data['theme']['language'] - page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1) + lang = config.data["theme"]["language"] + page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1) - if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'): + if config.data["extra"].get("version_prefix") or config.data["extra"].get( + "single_page" + ): return markdown if self.skip_git_log: return markdown diff --git a/docs/tools/nav.py b/docs/tools/nav.py index db64d1ba404..e3df85bbe4e 100644 --- a/docs/tools/nav.py +++ b/docs/tools/nav.py @@ -10,57 +10,59 @@ import util def find_first_header(content): - for line in content.split('\n'): - if line.startswith('#'): - no_hash = line.lstrip('#') - return no_hash.split('{', 1)[0].strip() + for line in content.split("\n"): + if line.startswith("#"): + no_hash = line.lstrip("#") + return no_hash.split("{", 1)[0].strip() def build_nav_entry(root, args): - if root.endswith('images'): + if root.endswith("images"): return None, None, None result_items = [] - index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md')) - current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title')) - current_title = current_title or index_meta.get('title', find_first_header(index_content)) + index_meta, index_content = util.read_md_file(os.path.join(root, "index.md")) + current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title")) + current_title = current_title or index_meta.get( + "title", find_first_header(index_content) + ) for filename in os.listdir(root): path = os.path.join(root, filename) if os.path.isdir(path): prio, title, payload = build_nav_entry(path, args) if title and payload: result_items.append((prio, title, payload)) - elif filename.endswith('.md'): + elif filename.endswith(".md"): path = os.path.join(root, filename) - meta = '' - content = '' + meta = "" + content = "" try: meta, content = util.read_md_file(path) except: - print('Error in file: {}'.format(path)) + print("Error in file: {}".format(path)) raise - path = path.split('/', 2)[-1] - title = meta.get('toc_title', find_first_header(content)) + path = path.split("/", 2)[-1] + title = meta.get("toc_title", find_first_header(content)) if title: - title = title.strip().rstrip('.') + title = title.strip().rstrip(".") else: - title = meta.get('toc_folder_title', 'hidden') - prio = meta.get('toc_priority', 9999) - logging.debug(f'Nav entry: {prio}, {title}, {path}') - if meta.get('toc_hidden') or not content.strip(): - title = 'hidden' - if title == 'hidden': - title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest() + title = meta.get("toc_folder_title", "hidden") + prio = meta.get("toc_priority", 9999) + logging.debug(f"Nav entry: {prio}, {title}, {path}") + if meta.get("toc_hidden") or not content.strip(): + title = "hidden" + if title == "hidden": + title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest() if args.nav_limit and len(result_items) >= args.nav_limit: break result_items.append((prio, title, path)) result_items = sorted(result_items, key=lambda x: (x[0], x[1])) result = collections.OrderedDict([(item[1], item[2]) for item in result_items]) - if index_meta.get('toc_hidden_folder'): - current_title += '|hidden-folder' - return index_meta.get('toc_priority', 10000), current_title, result + if index_meta.get("toc_hidden_folder"): + current_title += "|hidden-folder" + return index_meta.get("toc_priority", 10000), current_title, result def build_docs_nav(lang, args): @@ -70,7 +72,7 @@ def build_docs_nav(lang, args): index_key = None for key, value in list(nav.items()): if key and value: - if value == 'index.md': + if value == "index.md": index_key = key continue result.append({key: value}) @@ -78,7 +80,7 @@ def build_docs_nav(lang, args): break if index_key: key = list(result[0].keys())[0] - result[0][key][index_key] = 'index.md' + result[0][key][index_key] = "index.md" result[0][key].move_to_end(index_key, last=False) return result @@ -86,7 +88,7 @@ def build_docs_nav(lang, args): def build_blog_nav(lang, args): blog_dir = os.path.join(args.blog_dir, lang) years = sorted(os.listdir(blog_dir), reverse=True) - result_nav = [{'hidden': 'index.md'}] + result_nav = [{"hidden": "index.md"}] post_meta = collections.OrderedDict() for year in years: year_dir = os.path.join(blog_dir, year) @@ -97,38 +99,53 @@ def build_blog_nav(lang, args): post_meta_items = [] for post in os.listdir(year_dir): post_path = os.path.join(year_dir, post) - if not post.endswith('.md'): - raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}') + if not post.endswith(".md"): + raise RuntimeError( + f"Unexpected non-md file in posts folder: {post_path}" + ) meta, _ = util.read_md_file(post_path) - post_date = meta['date'] - post_title = meta['title'] + post_date = meta["date"] + post_title = meta["title"] if datetime.date.fromisoformat(post_date) > datetime.date.today(): continue posts.append( - (post_date, post_title, os.path.join(year, post),) + ( + post_date, + post_title, + os.path.join(year, post), + ) ) if post_title in post_meta: - raise RuntimeError(f'Duplicate post title: {post_title}') - if not post_date.startswith(f'{year}-'): - raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}') - post_url_part = post.replace('.md', '') - post_meta_items.append((post_date, { - 'date': post_date, - 'title': post_title, - 'image': meta.get('image'), - 'url': f'/blog/{lang}/{year}/{post_url_part}/' - },)) + raise RuntimeError(f"Duplicate post title: {post_title}") + if not post_date.startswith(f"{year}-"): + raise RuntimeError( + f"Post date {post_date} doesn't match the folder year {year}: {post_title}" + ) + post_url_part = post.replace(".md", "") + post_meta_items.append( + ( + post_date, + { + "date": post_date, + "title": post_title, + "image": meta.get("image"), + "url": f"/blog/{lang}/{year}/{post_url_part}/", + }, + ) + ) for _, title, path in sorted(posts, reverse=True): result_nav[-1][year][title] = path - for _, post_meta_item in sorted(post_meta_items, - reverse=True, - key=lambda item: item[0]): - post_meta[post_meta_item['title']] = post_meta_item + for _, post_meta_item in sorted( + post_meta_items, reverse=True, key=lambda item: item[0] + ): + post_meta[post_meta_item["title"]] = post_meta_item return result_nav, post_meta def _custom_get_navigation(files, config): - nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages()) + nav_config = config["nav"] or mkdocs.structure.nav.nest_paths( + f.src_path for f in files.documentation_pages() + ) items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config) if not isinstance(items, list): items = [items] @@ -138,19 +155,25 @@ def _custom_get_navigation(files, config): mkdocs.structure.nav._add_previous_and_next_links(pages) mkdocs.structure.nav._add_parent_links(items) - missing_from_config = [file for file in files.documentation_pages() if file.page is None] + missing_from_config = [ + file for file in files.documentation_pages() if file.page is None + ] if missing_from_config: - files._files = [file for file in files._files if file not in missing_from_config] + files._files = [ + file for file in files._files if file not in missing_from_config + ] links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link) for link in links: - scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url) + scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse( + link.url + ) if scheme or netloc: mkdocs.structure.nav.log.debug( "An external link to '{}' is included in " "the 'nav' configuration.".format(link.url) ) - elif link.url.startswith('/'): + elif link.url.startswith("/"): mkdocs.structure.nav.log.debug( "An absolute path to '{}' is included in the 'nav' configuration, " "which presumably points to an external resource.".format(link.url) diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py index 1f0a3bb4b74..5d222376683 100644 --- a/docs/tools/redirects.py +++ b/docs/tools/redirects.py @@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url): os.makedirs(out_dir) except OSError: pass - with open(out_path, 'w') as f: - f.write(f''' + with open(out_path, "w") as f: + f.write( + f""" @@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url): If you are not redirected automatically, follow this link. -''') +""" + ) def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path): out_path = os.path.join( - output_dir, lang, - from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') + output_dir, + lang, + from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"), ) - target_path = to_path.replace('/index.md', '/').replace('.md', '/') + target_path = to_path.replace("/index.md", "/").replace(".md", "/") - if target_path[0:7] != 'http://' and target_path[0:8] != 'https://': - to_url = f'/{base_prefix}/{lang}/{target_path}' + if target_path[0:7] != "http://" and target_path[0:8] != "https://": + to_url = f"/{base_prefix}/{lang}/{target_path}" else: to_url = target_path @@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path) def build_docs_redirects(args): - with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f: + with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f: for line in f: - for lang in args.lang.split(','): - from_path, to_path = line.split(' ', 1) - build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path) + for lang in args.lang.split(","): + from_path, to_path = line.split(" ", 1) + build_redirect_html( + args, "docs", lang, args.docs_output_dir, from_path, to_path + ) def build_blog_redirects(args): - for lang in args.blog_lang.split(','): - redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt') + for lang in args.blog_lang.split(","): + redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt") if os.path.exists(redirects_path): - with open(redirects_path, 'r') as f: + with open(redirects_path, "r") as f: for line in f: - from_path, to_path = line.split(' ', 1) - build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path) + from_path, to_path = line.split(" ", 1) + build_redirect_html( + args, "blog", lang, args.blog_output_dir, from_path, to_path + ) def build_static_redirects(args): for static_redirect in [ - ('benchmark.html', '/benchmark/dbms/'), - ('benchmark_hardware.html', '/benchmark/hardware/'), - ('tutorial.html', '/docs/en/getting_started/tutorial/',), - ('reference_en.html', '/docs/en/single/', ), - ('reference_ru.html', '/docs/ru/single/',), - ('docs/index.html', '/docs/en/',), + ("benchmark.html", "/benchmark/dbms/"), + ("benchmark_hardware.html", "/benchmark/hardware/"), + ( + "tutorial.html", + "/docs/en/getting_started/tutorial/", + ), + ( + "reference_en.html", + "/docs/en/single/", + ), + ( + "reference_ru.html", + "/docs/ru/single/", + ), + ( + "docs/index.html", + "/docs/en/", + ), ]: write_redirect_html( - os.path.join(args.output_dir, static_redirect[0]), - static_redirect[1] + os.path.join(args.output_dir, static_redirect[0]), static_redirect[1] ) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 8bf1a5f477c..c48a70b0909 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -10,7 +10,7 @@ cssmin==0.2.0 future==0.18.2 htmlmin==0.1.12 idna==2.10 -Jinja2>=3.0.3 +Jinja2==3.0.3 jinja2-highlight==0.6.1 jsmin==3.0.0 livereload==2.6.3 diff --git a/docs/tools/single_page.py b/docs/tools/single_page.py index 3d32ba30a21..ed285fce9f8 100644 --- a/docs/tools/single_page.py +++ b/docs/tools/single_page.py @@ -12,7 +12,8 @@ import test import util import website -TEMPORARY_FILE_NAME = 'single.md' +TEMPORARY_FILE_NAME = "single.md" + def recursive_values(item): if isinstance(item, dict): @@ -25,11 +26,14 @@ def recursive_values(item): yield item -anchor_not_allowed_chars = re.compile(r'[^\w\-]') -def generate_anchor_from_path(path): - return re.sub(anchor_not_allowed_chars, '-', path) +anchor_not_allowed_chars = re.compile(r"[^\w\-]") -absolute_link = re.compile(r'^https?://') + +def generate_anchor_from_path(path): + return re.sub(anchor_not_allowed_chars, "-", path) + + +absolute_link = re.compile(r"^https?://") def replace_link(match, path): @@ -40,46 +44,55 @@ def replace_link(match, path): if re.search(absolute_link, link): return match.group(0) - if link.endswith('/'): - link = link[0:-1] + '.md' + if link.endswith("/"): + link = link[0:-1] + ".md" - return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link)))) + return "{}(#{})".format( + title, + generate_anchor_from_path( + os.path.normpath(os.path.join(os.path.dirname(path), link)) + ), + ) # Concatenates Markdown files to a single file. def concatenate(lang, docs_path, single_page_file, nav): lang_path = os.path.join(docs_path, lang) - proj_config = f'{docs_path}/toc_{lang}.yml' + proj_config = f"{docs_path}/toc_{lang}.yml" if os.path.exists(proj_config): with open(proj_config) as cfg_file: - nav = yaml.full_load(cfg_file.read())['nav'] + nav = yaml.full_load(cfg_file.read())["nav"] files_to_concatenate = list(recursive_values(nav)) files_count = len(files_to_concatenate) - logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.') - logging.debug('Concatenating: ' + ', '.join(files_to_concatenate)) - assert files_count > 0, f'Empty single-page for {lang}' + logging.info( + f"{files_count} files will be concatenated into single md-file for {lang}." + ) + logging.debug("Concatenating: " + ", ".join(files_to_concatenate)) + assert files_count > 0, f"Empty single-page for {lang}" - link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)') + link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)") for path in files_to_concatenate: try: with open(os.path.join(lang_path, path)) as f: # Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file. - single_page_file.write('\n______\n\n' % generate_anchor_from_path(path)) + single_page_file.write( + '\n______\n\n' % generate_anchor_from_path(path) + ) in_metadata = False for line in f: # Skip YAML metadata. - if line == '---\n': + if line == "---\n": in_metadata = not in_metadata continue if not in_metadata: # Increase the level of headers. - if line.startswith('#'): - line = '#' + line + if line.startswith("#"): + line = "#" + line # Replace links within the docs. @@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav): line = re.sub( link_regexp, lambda match: replace_link(match, path), - line) + line, + ) # If failed to replace the relative link, print to log # But with some exceptions: # - "../src/" -- for cmake-in-clickhouse.md (link to sources) # - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo" - if '../' in line and (not '../usr/share' in line) and (not '../src/' in line): - logging.info('Failed to resolve relative link:') + if ( + "../" in line + and (not "../usr/share" in line) + and (not "../src/" in line) + ): + logging.info("Failed to resolve relative link:") logging.info(path) logging.info(line) @@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav): single_page_file.flush() + def get_temporary_file_name(lang, args): return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME) + def remove_temporary_files(lang, args): single_md_path = get_temporary_file_name(lang, args) if os.path.exists(single_md_path): @@ -115,14 +135,14 @@ def remove_temporary_files(lang, args): def build_single_page_version(lang, args, nav, cfg): - logging.info(f'Building single page version for {lang}') - os.environ['SINGLE_PAGE'] = '1' - extra = cfg.data['extra'] - extra['single_page'] = True - extra['is_amp'] = False + logging.info(f"Building single page version for {lang}") + os.environ["SINGLE_PAGE"] = "1" + extra = cfg.data["extra"] + extra["single_page"] = True + extra["is_amp"] = False single_md_path = get_temporary_file_name(lang, args) - with open(single_md_path, 'w') as single_md: + with open(single_md_path, "w") as single_md: concatenate(lang, args.docs_dir, single_md, nav) with util.temp_dir() as site_temp: @@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg): shutil.copytree(docs_src_lang, docs_temp_lang) for root, _, filenames in os.walk(docs_temp_lang): for filename in filenames: - if filename != 'single.md' and filename.endswith('.md'): + if filename != "single.md" and filename.endswith(".md"): os.unlink(os.path.join(root, filename)) - cfg.load_dict({ - 'docs_dir': docs_temp_lang, - 'site_dir': site_temp, - 'extra': extra, - 'nav': [ - {cfg.data.get('site_name'): 'single.md'} - ] - }) + cfg.load_dict( + { + "docs_dir": docs_temp_lang, + "site_dir": site_temp, + "extra": extra, + "nav": [{cfg.data.get("site_name"): "single.md"}], + } + ) if not args.test_only: mkdocs.commands.build.build(cfg) - single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single') + single_page_output_path = os.path.join( + args.docs_dir, args.docs_output_dir, lang, "single" + ) if os.path.exists(single_page_output_path): shutil.rmtree(single_page_output_path) shutil.copytree( - os.path.join(site_temp, 'single'), - single_page_output_path + os.path.join(site_temp, "single"), single_page_output_path ) - single_page_index_html = os.path.join(single_page_output_path, 'index.html') - single_page_content_js = os.path.join(single_page_output_path, 'content.js') + single_page_index_html = os.path.join( + single_page_output_path, "index.html" + ) + single_page_content_js = os.path.join( + single_page_output_path, "content.js" + ) - with open(single_page_index_html, 'r') as f: - sp_prefix, sp_js, sp_suffix = f.read().split('') + with open(single_page_index_html, "r") as f: + sp_prefix, sp_js, sp_suffix = f.read().split("") - with open(single_page_index_html, 'w') as f: + with open(single_page_index_html, "w") as f: f.write(sp_prefix) f.write(sp_suffix) - with open(single_page_content_js, 'w') as f: + with open(single_page_content_js, "w") as f: if args.minify: import jsmin + sp_js = jsmin.jsmin(sp_js) f.write(sp_js) - logging.info(f'Re-building single page for {lang} pdf/test') + logging.info(f"Re-building single page for {lang} pdf/test") with util.temp_dir() as test_dir: - extra['single_page'] = False - cfg.load_dict({ - 'docs_dir': docs_temp_lang, - 'site_dir': test_dir, - 'extra': extra, - 'nav': [ - {cfg.data.get('site_name'): 'single.md'} - ] - }) + extra["single_page"] = False + cfg.load_dict( + { + "docs_dir": docs_temp_lang, + "site_dir": test_dir, + "extra": extra, + "nav": [{cfg.data.get("site_name"): "single.md"}], + } + ) mkdocs.commands.build.build(cfg) - css_in = ' '.join(website.get_css_in(args)) - js_in = ' '.join(website.get_js_in(args)) - subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True) - subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True) + css_in = " ".join(website.get_css_in(args)) + js_in = " ".join(website.get_js_in(args)) + subprocess.check_call( + f"cat {css_in} > {test_dir}/css/base.css", shell=True + ) + subprocess.check_call( + f"cat {js_in} > {test_dir}/js/base.js", shell=True + ) if args.save_raw_single_page: shutil.copytree(test_dir, args.save_raw_single_page) - logging.info(f'Running tests for {lang}') + logging.info(f"Running tests for {lang}") test.test_single_page( - os.path.join(test_dir, 'single', 'index.html'), lang) + os.path.join(test_dir, "single", "index.html"), lang + ) - logging.info(f'Finished building single page version for {lang}') + logging.info(f"Finished building single page version for {lang}") remove_temporary_files(lang, args) diff --git a/docs/tools/test.py b/docs/tools/test.py index 1ea07c45192..d0469d042ee 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -8,14 +8,11 @@ import subprocess def test_single_page(input_path, lang): - if not (lang == 'en'): + if not (lang == "en"): return with open(input_path) as f: - soup = bs4.BeautifulSoup( - f, - features='html.parser' - ) + soup = bs4.BeautifulSoup(f, features="html.parser") anchor_points = set() @@ -23,30 +20,27 @@ def test_single_page(input_path, lang): links_to_nowhere = 0 for tag in soup.find_all(): - for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]: + for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]: if anchor_point: anchor_points.add(anchor_point) for tag in soup.find_all(): - href = tag.attrs.get('href') - if href and href.startswith('#') and href != '#': + href = tag.attrs.get("href") + if href and href.startswith("#") and href != "#": if href[1:] not in anchor_points: links_to_nowhere += 1 logging.info("Tag %s", tag) - logging.info('Link to nowhere: %s' % href) + logging.info("Link to nowhere: %s" % href) if links_to_nowhere: - logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}') + logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}") sys.exit(1) if len(anchor_points) <= 10: - logging.error('Html parsing is probably broken') + logging.error("Html parsing is probably broken") sys.exit(1) -if __name__ == '__main__': - logging.basicConfig( - level=logging.DEBUG, - stream=sys.stderr - ) +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) test_single_page(sys.argv[1], sys.argv[2]) diff --git a/docs/tools/util.py b/docs/tools/util.py index 25961561f99..fb2f135c85e 100644 --- a/docs/tools/util.py +++ b/docs/tools/util.py @@ -15,7 +15,7 @@ import yaml @contextlib.contextmanager def temp_dir(): - path = tempfile.mkdtemp(dir=os.environ.get('TEMP')) + path = tempfile.mkdtemp(dir=os.environ.get("TEMP")) try: yield path finally: @@ -34,7 +34,7 @@ def cd(new_cwd): def get_free_port(): with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: - s.bind(('', 0)) + s.bind(("", 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) return s.getsockname()[1] @@ -61,12 +61,12 @@ def read_md_file(path): meta_text = [] content = [] if os.path.exists(path): - with open(path, 'r') as f: + with open(path, "r") as f: for line in f: - if line.startswith('---'): + if line.startswith("---"): if in_meta: in_meta = False - meta = yaml.full_load(''.join(meta_text)) + meta = yaml.full_load("".join(meta_text)) else: in_meta = True else: @@ -74,7 +74,7 @@ def read_md_file(path): meta_text.append(line) else: content.append(line) - return meta, ''.join(content) + return meta, "".join(content) def write_md_file(path, meta, content): @@ -82,13 +82,13 @@ def write_md_file(path, meta, content): if not os.path.exists(dirname): os.makedirs(dirname) - with open(path, 'w') as f: + with open(path, "w") as f: if meta: - print('---', file=f) + print("---", file=f) yaml.dump(meta, f) - print('---', file=f) - if not content.startswith('\n'): - print('', file=f) + print("---", file=f) + if not content.startswith("\n"): + print("", file=f) f.write(content) @@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data): value.append((node_key, node_value)) - return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value) + return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value) yaml.add_representer(collections.OrderedDict, represent_ordereddict) @@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict) def init_jinja2_filters(env): import amp import website + chunk_size = 10240 - env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)] - env.filters['html_to_amp'] = amp.html_to_amp - env.filters['adjust_markdown_html'] = website.adjust_markdown_html - env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT') + env.filters["chunks"] = lambda line: [ + line[i : i + chunk_size] for i in range(0, len(line), chunk_size) + ] + env.filters["html_to_amp"] = amp.html_to_amp + env.filters["adjust_markdown_html"] = website.adjust_markdown_html + env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime( + d, "%Y-%m-%d" + ).strftime("%a, %d %b %Y %H:%M:%S GMT") def init_jinja2_env(args): import mdx_clickhouse + env = jinja2.Environment( - loader=jinja2.FileSystemLoader([ - args.website_dir, - os.path.join(args.docs_dir, '_includes') - ]), - extensions=[ - 'jinja2.ext.i18n', - 'jinja2_highlight.HighlightExtension' - ] + loader=jinja2.FileSystemLoader( + [args.website_dir, os.path.join(args.docs_dir, "_includes")] + ), + extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"], ) - env.extend(jinja2_highlight_cssclass='syntax p-3 my-3') - translations_dir = os.path.join(args.website_dir, 'locale') + env.extend(jinja2_highlight_cssclass="syntax p-3 my-3") + translations_dir = os.path.join(args.website_dir, "locale") env.install_gettext_translations( - mdx_clickhouse.get_translations(translations_dir, 'en'), - newstyle=True + mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True ) init_jinja2_filters(env) return env diff --git a/docs/tools/website.py b/docs/tools/website.py index de4cc14670c..2c748d96414 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -17,108 +17,112 @@ import util def handle_iframe(iframe, soup): - allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/'] + allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"] illegal_domain = True - iframe_src = iframe.attrs['src'] + iframe_src = iframe.attrs["src"] for domain in allowed_domains: if iframe_src.startswith(domain): illegal_domain = False break if illegal_domain: - raise RuntimeError(f'iframe from illegal domain: {iframe_src}') - wrapper = soup.new_tag('div') - wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9'] + raise RuntimeError(f"iframe from illegal domain: {iframe_src}") + wrapper = soup.new_tag("div") + wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"] iframe.insert_before(wrapper) iframe.extract() wrapper.insert(0, iframe) - if 'width' in iframe.attrs: - del iframe.attrs['width'] - if 'height' in iframe.attrs: - del iframe.attrs['height'] - iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture' - iframe.attrs['class'] = 'embed-responsive-item' - iframe.attrs['frameborder'] = '0' - iframe.attrs['allowfullscreen'] = '1' + if "width" in iframe.attrs: + del iframe.attrs["width"] + if "height" in iframe.attrs: + del iframe.attrs["height"] + iframe.attrs[ + "allow" + ] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" + iframe.attrs["class"] = "embed-responsive-item" + iframe.attrs["frameborder"] = "0" + iframe.attrs["allowfullscreen"] = "1" def adjust_markdown_html(content): - soup = bs4.BeautifulSoup( - content, - features='html.parser' - ) + soup = bs4.BeautifulSoup(content, features="html.parser") - for a in soup.find_all('a'): - a_class = a.attrs.get('class') - a_href = a.attrs.get('href') - if a_class and 'headerlink' in a_class: - a.string = '\xa0' - if a_href and a_href.startswith('http'): - a.attrs['target'] = '_blank' + for a in soup.find_all("a"): + a_class = a.attrs.get("class") + a_href = a.attrs.get("href") + if a_class and "headerlink" in a_class: + a.string = "\xa0" + if a_href and a_href.startswith("http"): + a.attrs["target"] = "_blank" - for code in soup.find_all('code'): - code_class = code.attrs.get('class') + for code in soup.find_all("code"): + code_class = code.attrs.get("class") if code_class: - code.attrs['class'] = code_class + ['syntax'] + code.attrs["class"] = code_class + ["syntax"] else: - code.attrs['class'] = 'syntax' + code.attrs["class"] = "syntax" - for iframe in soup.find_all('iframe'): + for iframe in soup.find_all("iframe"): handle_iframe(iframe, soup) - for img in soup.find_all('img'): - if img.attrs.get('alt') == 'iframe': - img.name = 'iframe' - img.string = '' + for img in soup.find_all("img"): + if img.attrs.get("alt") == "iframe": + img.name = "iframe" + img.string = "" handle_iframe(img, soup) continue - img_class = img.attrs.get('class') + img_class = img.attrs.get("class") if img_class: - img.attrs['class'] = img_class + ['img-fluid'] + img.attrs["class"] = img_class + ["img-fluid"] else: - img.attrs['class'] = 'img-fluid' + img.attrs["class"] = "img-fluid" - for details in soup.find_all('details'): - for summary in details.find_all('summary'): + for details in soup.find_all("details"): + for summary in details.find_all("summary"): if summary.parent != details: summary.extract() details.insert(0, summary) - for dd in soup.find_all('dd'): - dd_class = dd.attrs.get('class') + for dd in soup.find_all("dd"): + dd_class = dd.attrs.get("class") if dd_class: - dd.attrs['class'] = dd_class + ['pl-3'] + dd.attrs["class"] = dd_class + ["pl-3"] else: - dd.attrs['class'] = 'pl-3' + dd.attrs["class"] = "pl-3" - for div in soup.find_all('div'): - div_class = div.attrs.get('class') - is_admonition = div_class and 'admonition' in div.attrs.get('class') + for div in soup.find_all("div"): + div_class = div.attrs.get("class") + is_admonition = div_class and "admonition" in div.attrs.get("class") if is_admonition: - for a in div.find_all('a'): - a_class = a.attrs.get('class') + for a in div.find_all("a"): + a_class = a.attrs.get("class") if a_class: - a.attrs['class'] = a_class + ['alert-link'] + a.attrs["class"] = a_class + ["alert-link"] else: - a.attrs['class'] = 'alert-link' + a.attrs["class"] = "alert-link" - for p in div.find_all('p'): - p_class = p.attrs.get('class') - if is_admonition and p_class and ('admonition-title' in p_class): - p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2'] + for p in div.find_all("p"): + p_class = p.attrs.get("class") + if is_admonition and p_class and ("admonition-title" in p_class): + p.attrs["class"] = p_class + [ + "alert-heading", + "display-4", + "text-reset", + "mb-2", + ] if is_admonition: - div.attrs['role'] = 'alert' - if ('info' in div_class) or ('note' in div_class): - mode = 'alert-primary' - elif ('attention' in div_class) or ('warning' in div_class): - mode = 'alert-warning' - elif 'important' in div_class: - mode = 'alert-danger' - elif 'tip' in div_class: - mode = 'alert-info' + div.attrs["role"] = "alert" + if ("info" in div_class) or ("note" in div_class): + mode = "alert-primary" + elif ("attention" in div_class) or ("warning" in div_class): + mode = "alert-warning" + elif "important" in div_class: + mode = "alert-danger" + elif "tip" in div_class: + mode = "alert-info" else: - mode = 'alert-secondary' - div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode] + mode = "alert-secondary" + div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode] return str(soup) @@ -128,61 +132,63 @@ def minify_html(content): def build_website(args): - logging.info('Building website') + logging.info("Building website") env = util.init_jinja2_env(args) shutil.copytree( args.website_dir, args.output_dir, ignore=shutil.ignore_patterns( - '*.md', - '*.sh', - '*.css', - '*.json', - 'js/*.js', - 'build', - 'docs', - 'public', - 'node_modules', - 'src', - 'templates', - 'locale', - '.gitkeep' - ) + "*.md", + "*.sh", + "*.css", + "*.json", + "js/*.js", + "build", + "docs", + "public", + "node_modules", + "src", + "templates", + "locale", + ".gitkeep", + ), ) shutil.copytree( - os.path.join(args.website_dir, 'images'), - os.path.join(args.output_dir, 'docs', 'images') + os.path.join(args.website_dir, "images"), + os.path.join(args.output_dir, "docs", "images"), ) # This file can be requested to check for available ClickHouse releases. shutil.copy2( - os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'), - os.path.join(args.output_dir, 'data', 'version_date.tsv')) + os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"), + os.path.join(args.output_dir, "data", "version_date.tsv"), + ) # This file can be requested to install ClickHouse. shutil.copy2( - os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'), - os.path.join(args.output_dir, 'data', 'install.sh')) + os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"), + os.path.join(args.output_dir, "data", "install.sh"), + ) for root, _, filenames in os.walk(args.output_dir): for filename in filenames: - if filename == 'main.html': + if filename == "main.html": continue path = os.path.join(root, filename) - if not filename.endswith('.html'): + if not filename.endswith(".html"): continue - logging.info('Processing %s', path) - with open(path, 'rb') as f: - content = f.read().decode('utf-8') + logging.info("Processing %s", path) + with open(path, "rb") as f: + content = f.read().decode("utf-8") template = env.from_string(content) content = template.render(args.__dict__) - with open(path, 'wb') as f: - f.write(content.encode('utf-8')) + with open(path, "wb") as f: + f.write(content.encode("utf-8")) def get_css_in(args): @@ -193,7 +199,7 @@ def get_css_in(args): f"'{args.website_dir}/css/blog.css'", f"'{args.website_dir}/css/docs.css'", f"'{args.website_dir}/css/highlight.css'", - f"'{args.website_dir}/css/main.css'" + f"'{args.website_dir}/css/main.css'", ] @@ -207,42 +213,41 @@ def get_js_in(args): f"'{args.website_dir}/js/index.js'", f"'{args.website_dir}/js/docsearch.js'", f"'{args.website_dir}/js/docs.js'", - f"'{args.website_dir}/js/main.js'" + f"'{args.website_dir}/js/main.js'", ] def minify_file(path, css_digest, js_digest): - if not ( - path.endswith('.html') or - path.endswith('.css') - ): + if not (path.endswith(".html") or path.endswith(".css")): return - logging.info('Minifying %s', path) - with open(path, 'rb') as f: - content = f.read().decode('utf-8') - if path.endswith('.html'): + logging.info("Minifying %s", path) + with open(path, "rb") as f: + content = f.read().decode("utf-8") + if path.endswith(".html"): content = minify_html(content) - content = content.replace('base.css?css_digest', f'base.css?{css_digest}') - content = content.replace('base.js?js_digest', f'base.js?{js_digest}') -# TODO: restore cssmin -# elif path.endswith('.css'): -# content = cssmin.cssmin(content) -# TODO: restore jsmin -# elif path.endswith('.js'): -# content = jsmin.jsmin(content) - with open(path, 'wb') as f: - f.write(content.encode('utf-8')) + content = content.replace("base.css?css_digest", f"base.css?{css_digest}") + content = content.replace("base.js?js_digest", f"base.js?{js_digest}") + # TODO: restore cssmin + # elif path.endswith('.css'): + # content = cssmin.cssmin(content) + # TODO: restore jsmin + # elif path.endswith('.js'): + # content = jsmin.jsmin(content) + with open(path, "wb") as f: + f.write(content.encode("utf-8")) def minify_website(args): - css_in = ' '.join(get_css_in(args)) - css_out = f'{args.output_dir}/docs/css/base.css' - os.makedirs(f'{args.output_dir}/docs/css') + css_in = " ".join(get_css_in(args)) + css_out = f"{args.output_dir}/docs/css/base.css" + os.makedirs(f"{args.output_dir}/docs/css") if args.minify and False: # TODO: return closure - command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \ + command = ( + f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}" + ) logging.info(css_in) logging.info(command) output = subprocess.check_output(command, shell=True) @@ -251,51 +256,60 @@ def minify_website(args): else: command = f"cat {css_in}" output = subprocess.check_output(command, shell=True) - with open(css_out, 'wb+') as f: + with open(css_out, "wb+") as f: f.write(output) - with open(css_out, 'rb') as f: + with open(css_out, "rb") as f: css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] - js_in = ' '.join(get_js_in(args)) - js_out = f'{args.output_dir}/docs/js/base.js' - os.makedirs(f'{args.output_dir}/docs/js') + js_in = " ".join(get_js_in(args)) + js_out = f"{args.output_dir}/docs/js/base.js" + os.makedirs(f"{args.output_dir}/docs/js") if args.minify and False: # TODO: return closure js_in = [js[1:-1] for js in js_in] closure_args = [ - '--js', *js_in, '--js_output_file', js_out, - '--compilation_level', 'SIMPLE', - '--dependency_mode', 'NONE', - '--third_party', '--use_types_for_optimization', - '--isolation_mode', 'IIFE' + "--js", + *js_in, + "--js_output_file", + js_out, + "--compilation_level", + "SIMPLE", + "--dependency_mode", + "NONE", + "--third_party", + "--use_types_for_optimization", + "--isolation_mode", + "IIFE", ] logging.info(closure_args) if closure.run(*closure_args): - raise RuntimeError('failed to run closure compiler') - with open(js_out, 'r') as f: + raise RuntimeError("failed to run closure compiler") + with open(js_out, "r") as f: js_content = jsmin.jsmin(f.read()) - with open(js_out, 'w') as f: + with open(js_out, "w") as f: f.write(js_content) else: command = f"cat {js_in}" output = subprocess.check_output(command, shell=True) - with open(js_out, 'wb+') as f: + with open(js_out, "wb+") as f: f.write(output) - with open(js_out, 'rb') as f: + with open(js_out, "rb") as f: js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] logging.info(js_digest) if args.minify: - logging.info('Minifying website') + logging.info("Minifying website") with concurrent.futures.ThreadPoolExecutor() as executor: futures = [] for root, _, filenames in os.walk(args.output_dir): for filename in filenames: path = os.path.join(root, filename) - futures.append(executor.submit(minify_file, path, css_digest, js_digest)) + futures.append( + executor.submit(minify_file, path, css_digest, js_digest) + ) for future in futures: exc = future.exception() if exc: @@ -304,24 +318,28 @@ def minify_website(args): def process_benchmark_results(args): - benchmark_root = os.path.join(args.website_dir, 'benchmark') + benchmark_root = os.path.join(args.website_dir, "benchmark") required_keys = { - 'dbms': ['result'], - 'hardware': ['result', 'system', 'system_full', 'kind'] + "dbms": ["result"], + "hardware": ["result", "system", "system_full", "kind"], } - for benchmark_kind in ['dbms', 'hardware']: + for benchmark_kind in ["dbms", "hardware"]: results = [] - results_root = os.path.join(benchmark_root, benchmark_kind, 'results') + results_root = os.path.join(benchmark_root, benchmark_kind, "results") for result in sorted(os.listdir(results_root)): result_file = os.path.join(results_root, result) - logging.debug(f'Reading benchmark result from {result_file}') - with open(result_file, 'r') as f: + logging.debug(f"Reading benchmark result from {result_file}") + with open(result_file, "r") as f: result = json.loads(f.read()) for item in result: for required_key in required_keys[benchmark_kind]: - assert required_key in item, f'No "{required_key}" in {result_file}' + assert ( + required_key in item + ), f'No "{required_key}" in {result_file}' results += result - results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js') - with open(results_js, 'w') as f: + results_js = os.path.join( + args.output_dir, "benchmark", benchmark_kind, "results.js" + ) + with open(results_js, "w") as f: data = json.dumps(results) - f.write(f'var results = {data};') + f.write(f"var results = {data};") diff --git a/docs/zh/development/continuous-integration.md b/docs/zh/development/continuous-integration.md index 4f37b6f88c7..5bebb3aec2a 100644 --- a/docs/zh/development/continuous-integration.md +++ b/docs/zh/development/continuous-integration.md @@ -42,6 +42,8 @@ git push 使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行). 如果失败, 按照[代码样式指南](./style.md)修复样式错误. +使用 [black](https://github.com/psf/black/) 檢查 python 代碼. + ### 报告详情 {#report-details} - [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html) - `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt) diff --git a/docs/zh/development/developer-instruction.md b/docs/zh/development/developer-instruction.md index bd7a197f926..7ade3ad57fb 100644 --- a/docs/zh/development/developer-instruction.md +++ b/docs/zh/development/developer-instruction.md @@ -259,7 +259,7 @@ ClickHouse的架构描述可以在此处查看:https://clickhouse.com/docs/en/ 即使工作尚未完成,也可以创建拉取请求。在这种情况下,请在标题的开头加上«WIP»(正在进行中),以便后续更改。这对于协同审查和讨论更改以及运行所有可用测试用例很有用。提供有关变更的简短描述很重要,这将在后续用于生成重新发布变更日志。 -Yandex成员一旦在您的拉取请求上贴上«可以测试»标签,就会开始测试。一些初始检查项(例如,代码类型)的结果会在几分钟内反馈。构建的检查结果将在半小时内完成。而主要的测试用例集结果将在一小时内报告给您。 +ClickHouse成员一旦在您的拉取请求上贴上«可以测试»标签,就会开始测试。一些初始检查项(例如,代码类型)的结果会在几分钟内反馈。构建的检查结果将在半小时内完成。而主要的测试用例集结果将在一小时内报告给您。 系统将分别为您的拉取请求准备ClickHouse二进制版本。若要检索这些构建信息,请在检查列表中单击« ClickHouse构建检查»旁边的«详细信息»链接。在这里,您会找到指向ClickHouse的.deb软件包的直接链接,此外,甚至可以将其部署在生产服务器上(如果您不担心)。 diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md index aa2c82d902a..24e0834d2fc 100644 --- a/docs/zh/engines/table-engines/integrations/hive.md +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -140,7 +140,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index b81d2206bf4..f5f2c428ea7 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -8,7 +8,7 @@ toc_title: "版本折叠MergeTree" 这个引擎: - 允许快速写入不断变化的对象状态。 -- 删除后台中的旧对象状态。 这显着降低了存储体积。 +- 删除后台中的旧对象状态。 这显著降低了存储体积。 请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。 diff --git a/docs/zh/getting-started/playground.md b/docs/zh/getting-started/playground.md index 33636c92829..f8f611d9d8d 100644 --- a/docs/zh/getting-started/playground.md +++ b/docs/zh/getting-started/playground.md @@ -3,62 +3,41 @@ toc_priority: 14 toc_title: 体验平台 --- -# ClickHouse体验平台 {#clickhouse-playground} +# ClickHouse Playground {#clickhouse-playground} -!!! warning "Warning" - This service is deprecated and will be replaced in foreseeable future. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +Several example datasets are available in Playground. -[ClickHouse体验平台](https://play.clickhouse.com?file=welcome) 允许人们通过即时运行查询来尝试ClickHouse,而无需设置他们的服务器或集群。 - -体验平台中提供几个示例数据集以及显示ClickHouse特性的示例查询。还有一些ClickHouse LTS版本可供尝试。 - -您可以使用任何HTTP客户端对ClickHouse体验平台进行查询,例如[curl](https://curl.haxx.se)或者[wget](https://www.gnu.org/software/wget/),或使用[JDBC](../interfaces/jdbc.md)或者[ODBC](../interfaces/odbc.md)驱动连接。关于支持ClickHouse的软件产品的更多信息详见[here](../interfaces/index.md). +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). ## Credentials {#credentials} -| 参数 | 值 | -|:--------------------|:----------------------------------------| -| HTTPS端点 | `https://play-api.clickhouse.com:8443` | -| TCP端点 | `play-api.clickhouse.com:9440` | -| 用户 | `playground` | -| 密码 | `clickhouse` | +| Parameter | Value | +|:--------------------|:-----------------------------------| +| HTTPS endpoint | `https://play.clickhouse.com:443/` | +| Native TCP endpoint | `play.clickhouse.com:9440` | +| User | `explorer` or `play` | +| Password | (empty) | -还有一些带有特定ClickHouse版本的附加信息来试验它们之间的差异(端口和用户/密码与上面相同): +## Limitations {#limitations} -- 20.3 LTS: `play-api-v20-3.clickhouse.com` -- 19.14 LTS: `play-api-v19-14.clickhouse.com` +The queries are executed as a read-only user. It implies some limitations: -!!! note "注意" - 所有这些端点都需要安全的TLS连接。 +- DDL queries are not allowed +- INSERT queries are not allowed -## 查询限制 {#limitations} +The service also have quotas on its usage. -查询以只读用户身份执行。 这意味着一些局限性: +## Examples {#examples} -- 不允许DDL查询 -- 不允许插入查询 - -还强制执行以下设置: -- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time) - -ClickHouse体验还有如下: -[ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse) -实例托管 [Yandex云](https://cloud.yandex.com/)。 -更多信息 [云提供商](../commercial/cloud.md)。 - -## 示例 {#examples} - -使用`curl`连接Https服务: +HTTPS endpoint example with `curl`: ``` bash -curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets" +curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` -TCP连接示例[CLI](../interfaces/cli.md): +TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash -clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" +clickhouse client --secure --host play.clickhouse.com --user explorer ``` diff --git a/docs/zh/interfaces/formats.md b/docs/zh/interfaces/formats.md index 4327a657793..40e9bfe7ff1 100644 --- a/docs/zh/interfaces/formats.md +++ b/docs/zh/interfaces/formats.md @@ -1240,7 +1240,8 @@ SELECT * FROM topic1_stream; | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` | | — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | @@ -1295,7 +1296,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ | `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP` | [DateTime64](../sql-reference/data-types/datetime64.md) | `TIMESTAMP` | | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `-` | [Array](../sql-reference/data-types/array.md) | `LIST` | diff --git a/docs/zh/operations/system-tables/functions.md b/docs/zh/operations/system-tables/functions.md index 695c7b7fee1..75df1f65c1f 100644 --- a/docs/zh/operations/system-tables/functions.md +++ b/docs/zh/operations/system-tables/functions.md @@ -15,7 +15,7 @@ ``` ┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐ │ sumburConsistentHash │ 0 │ 0 │ │ -│ yandexConsistentHash │ 0 │ 0 │ │ +│ kostikConsistentHash │ 0 │ 0 │ │ │ demangle │ 0 │ 0 │ │ │ addressToLine │ 0 │ 0 │ │ │ JSONExtractRaw │ 0 │ 0 │ │ diff --git a/docs/zh/sql-reference/functions/ext-dict-functions.md b/docs/zh/sql-reference/functions/ext-dict-functions.md index 12b9499cb64..87e19dc0119 100644 --- a/docs/zh/sql-reference/functions/ext-dict-functions.md +++ b/docs/zh/sql-reference/functions/ext-dict-functions.md @@ -31,7 +31,7 @@ - 对于’dict_name’分层字典,查找’child_id’键是否位于’ancestor_id’内(或匹配’ancestor_id’)。返回UInt8。 -## 独裁主义 {#dictgethierarchy} +## dictGetHierarchy {#dictgethierarchy} `dictGetHierarchy('dict_name', id)` diff --git a/packages/.gitignore b/packages/.gitignore new file mode 100644 index 00000000000..355164c1265 --- /dev/null +++ b/packages/.gitignore @@ -0,0 +1 @@ +*/ diff --git a/packages/build b/packages/build new file mode 100755 index 00000000000..53a7538f80e --- /dev/null +++ b/packages/build @@ -0,0 +1,156 @@ +#!/usr/bin/env bash + +set -e + +# Avoid dependency on locale +LC_ALL=C + +# Normalize output directory +if [ -n "$OUTPUT_DIR" ]; then + OUTPUT_DIR=$(realpath -m "$OUTPUT_DIR") +fi + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +cd "$CUR_DIR" + +ROOT_DIR=$(readlink -f "$(git rev-parse --show-cdup)") + +PKG_ROOT='root' + +DEB_ARCH=${DEB_ARCH:-amd64} +OUTPUT_DIR=${OUTPUT_DIR:-$ROOT_DIR} +[ -d "${OUTPUT_DIR}" ] || mkdir -p "${OUTPUT_DIR}" +SANITIZER=${SANITIZER:-""} +SOURCE=${SOURCE:-$PKG_ROOT} + +HELP="${0} [--test] [--rpm] [-h|--help] + --test - adds '+test' prefix to version + --apk - build APK packages + --rpm - build RPM packages + --tgz - build tarball package + --help - show this help and exit + +Used envs: + DEB_ARCH='${DEB_ARCH}' + OUTPUT_DIR='${OUTPUT_DIR}' - where the artifact will be placed + SANITIZER='${SANITIZER}' - if any sanitizer is used, affects version string + SOURCE='${SOURCE}' - directory with sources tree + VERSION_STRING='${VERSION_STRING}' - the package version to overwrite +" + +if [ -z "${VERSION_STRING}" ]; then + # Get CLICKHOUSE_VERSION_STRING from the current git repo + eval "$("$ROOT_DIR/tests/ci/version_helper.py" -e)" +else + CLICKHOUSE_VERSION_STRING=${VERSION_STRING} +fi +export CLICKHOUSE_VERSION_STRING + + + +while [[ $1 == --* ]] +do + case "$1" in + --test ) + VERSION_POSTFIX+='+test' + shift ;; + --apk ) + MAKE_APK=1 + shift ;; + --rpm ) + MAKE_RPM=1 + shift ;; + --tgz ) + MAKE_TGZ=1 + shift ;; + --help ) + echo "$HELP" + exit ;; + * ) + echo "Unknown option $1" + exit 2 ;; + esac +done + +function deb2tgz { + local FILE PKG_NAME PKG_DIR PKG_PATH TARBALL + FILE=$1 + PKG_NAME=${FILE##*/}; PKG_NAME=${PKG_NAME%%_*} + PKG_DIR="$PKG_NAME-$CLICKHOUSE_VERSION_STRING" + PKG_PATH="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING" + TARBALL="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING-$DEB_ARCH.tgz" + rm -rf "$PKG_PATH" + dpkg-deb -R "$FILE" "$PKG_PATH" + mkdir -p "$PKG_PATH/install" + cat > "$PKG_PATH/install/doinst.sh" << 'EOF' +#!/bin/sh +set -e + +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +for filepath in `find $SCRIPTPATH/.. -type f -or -type l | grep -v "\.\./install/"`; do + destpath=${filepath##$SCRIPTPATH/..} + mkdir -p $(dirname "$destpath") + cp -r "$filepath" "$destpath" +done +EOF + chmod +x "$PKG_PATH/install/doinst.sh" + if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then + tail +2 "$PKG_PATH/DEBIAN/postinst" > "$PKG_PATH/install/doinst.sh" + fi + rm -rf "$PKG_PATH/DEBIAN" + if [ -f "/usr/bin/pigz" ]; then + tar --use-compress-program=pigz -cf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR" + else + tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR" + fi + + rm -r "$PKG_PATH" +} + +# Build options +if [ -n "$SANITIZER" ]; then + if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan" + elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan" + elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan" + elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan" + else + echo "Unknown value of SANITIZER variable: $SANITIZER" + exit 3 + fi +elif [[ $BUILD_TYPE == 'debug' ]]; then + VERSION_POSTFIX+="+debug" +fi + +if [[ "$PKG_ROOT" != "$SOURCE" ]]; then + # packages are built only from PKG_SOURCE + rm -rf "./$PKG_ROOT" + ln -sf "$SOURCE" "$PKG_SOURCE" +fi + +CLICKHOUSE_VERSION_STRING+=$VERSION_POSTFIX +echo -e "\nCurrent version is $CLICKHOUSE_VERSION_STRING" + +for config in clickhouse*.yaml; do + echo "Building deb package for $config" + + # Preserve package path + exec 9>&1 + PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9) + PKG_PATH=${PKG_PATH##*created package: } + exec 9>&- + + if [ -n "$MAKE_APK" ]; then + echo "Building apk package for $config" + nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk + fi + if [ -n "$MAKE_RPM" ]; then + echo "Building rpm package for $config" + nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm + fi + if [ -n "$MAKE_TGZ" ]; then + echo "Building tarball for $config" + deb2tgz "$PKG_PATH" + fi +done + +# vim: ts=4: sw=4: sts=4: expandtab diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml new file mode 100644 index 00000000000..2a1389b6625 --- /dev/null +++ b/packages/clickhouse-client.yaml @@ -0,0 +1,57 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-client" +arch: "all" +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +replaces: +- clickhouse-compressor +conflicts: +- clickhouse-compressor + +maintainer: "ClickHouse Dev Team " +description: | + Client binary for ClickHouse + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark + +overrides: + deb: + depends: + - clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING}) + rpm: + depends: + - clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING} + +contents: +- src: root/etc/clickhouse-client/config.xml + dst: /etc/clickhouse-client/config.xml + type: config +- src: root/usr/bin/clickhouse-benchmark + dst: /usr/bin/clickhouse-benchmark +- src: root/usr/bin/clickhouse-compressor + dst: /usr/bin/clickhouse-compressor +- src: root/usr/bin/clickhouse-format + dst: /usr/bin/clickhouse-format +- src: root/usr/bin/clickhouse-client + dst: /usr/bin/clickhouse-client +- src: root/usr/bin/clickhouse-local + dst: /usr/bin/clickhouse-local +- src: root/usr/bin/clickhouse-obfuscator + dst: /usr/bin/clickhouse-obfuscator +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-client/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-client/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-client/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-client/README.md diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml new file mode 100644 index 00000000000..12a1594bd30 --- /dev/null +++ b/packages/clickhouse-common-static-dbg.yaml @@ -0,0 +1,38 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-common-static-dbg" +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +replaces: +- clickhouse-common-dbg +conflicts: +- clickhouse-common-dbg + +maintainer: "ClickHouse Dev Team " +description: | + debugging symbols for clickhouse-common-static + This package contains the debugging symbols for clickhouse-common. + +contents: +- src: root/usr/lib/debug/usr/bin/clickhouse.debug + dst: /usr/lib/debug/usr/bin/clickhouse.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-common-static-dbg/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-common-static-dbg/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-common-static-dbg/README.md diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml new file mode 100644 index 00000000000..269d4318e5e --- /dev/null +++ b/packages/clickhouse-common-static.yaml @@ -0,0 +1,48 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-common-static" +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +replaces: +- clickhouse-common +- clickhouse-server-base +provides: +- clickhouse-common +- clickhouse-server-base +suggests: +- clickhouse-common-static-dbg + +maintainer: "ClickHouse Dev Team " +description: | + Common files for ClickHouse + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + This package provides common files for both clickhouse server and client + +contents: +- src: root/usr/bin/clickhouse + dst: /usr/bin/clickhouse +- src: root/usr/bin/clickhouse-odbc-bridge + dst: /usr/bin/clickhouse-odbc-bridge +- src: root/usr/bin/clickhouse-library-bridge + dst: /usr/bin/clickhouse-library-bridge +- src: root/usr/bin/clickhouse-extract-from-config + dst: /usr/bin/clickhouse-extract-from-config +- src: root/usr/share/bash-completion/completions + dst: /usr/share/bash-completion/completions +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-common-static/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-common-static/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-common-static/README.md diff --git a/packages/clickhouse-keeper-dbg.yaml b/packages/clickhouse-keeper-dbg.yaml new file mode 100644 index 00000000000..2c70b7ad4aa --- /dev/null +++ b/packages/clickhouse-keeper-dbg.yaml @@ -0,0 +1,28 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-keeper-dbg" +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" +maintainer: "ClickHouse Dev Team " +description: | + debugging symbols for clickhouse-keeper + This package contains the debugging symbols for clickhouse-keeper. + +contents: +- src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug + dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-keeper-dbg/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-keeper-dbg/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-keeper-dbg/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-keeper-dbg/README.md diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml new file mode 100644 index 00000000000..e99ac30f944 --- /dev/null +++ b/packages/clickhouse-keeper.yaml @@ -0,0 +1,40 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-keeper" +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +conflicts: +- clickhouse-server +depends: +- adduser +suggests: +- clickhouse-keeper-dbg + +maintainer: "ClickHouse Dev Team " +description: | + Static clickhouse-keeper binary + A stand-alone clickhouse-keeper package + + +contents: +- src: root/etc/clickhouse-keeper + dst: /etc/clickhouse-keeper + type: config +- src: root/usr/bin/clickhouse-keeper + dst: /usr/bin/clickhouse-keeper +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-keeper/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-keeper/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-keeper/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-keeper/README.md diff --git a/packages/clickhouse-server.init b/packages/clickhouse-server.init new file mode 100755 index 00000000000..1695f6286b8 --- /dev/null +++ b/packages/clickhouse-server.init @@ -0,0 +1,227 @@ +#!/bin/sh +### BEGIN INIT INFO +# Provides: clickhouse-server +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Should-Start: $time $network +# Should-Stop: $network +# Short-Description: clickhouse-server daemon +### END INIT INFO +# +# NOTES: +# - Should-* -- script can start if the listed facilities are missing, unlike Required-* +# +# For the documentation [1]: +# +# [1]: https://wiki.debian.org/LSBInitScripts + +CLICKHOUSE_USER=clickhouse +CLICKHOUSE_GROUP=${CLICKHOUSE_USER} +SHELL=/bin/bash +PROGRAM=clickhouse-server +CLICKHOUSE_GENERIC_PROGRAM=clickhouse +CLICKHOUSE_PROGRAM_ENV="" +EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config +CLICKHOUSE_CONFDIR=/etc/$PROGRAM +CLICKHOUSE_LOGDIR=/var/log/clickhouse-server +CLICKHOUSE_LOGDIR_USER=root +CLICKHOUSE_DATADIR=/var/lib/clickhouse +if [ -d "/var/lock" ]; then + LOCALSTATEDIR=/var/lock +else + LOCALSTATEDIR=/run/lock +fi + +if [ ! -d "$LOCALSTATEDIR" ]; then + mkdir -p "$LOCALSTATEDIR" +fi + +CLICKHOUSE_BINDIR=/usr/bin +CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server +CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml +LOCKFILE=$LOCALSTATEDIR/$PROGRAM +CLICKHOUSE_PIDDIR=/var/run/$PROGRAM +CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid" +# CLICKHOUSE_STOP_TIMEOUT=60 # Disabled by default. Place to /etc/default/clickhouse if you need. + +# Some systems lack "flock" +command -v flock >/dev/null && FLOCK=flock + +# Override defaults from optional config file +test -f /etc/default/clickhouse && . /etc/default/clickhouse + + +die() +{ + echo $1 >&2 + exit 1 +} + + +# Check that configuration file is Ok. +check_config() +{ + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; + fi +} + + +initdb() +{ + ${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" +} + + +start() +{ + ${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" +} + + +stop() +{ + ${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}" +} + + +restart() +{ + ${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" +} + + +forcestop() +{ + ${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}" +} + + +service_or_func() +{ + if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then + systemctl $1 $PROGRAM + else + $1 + fi +} + +forcerestart() +{ + forcestop + # Should not use 'start' function if systemd active + service_or_func start +} + +use_cron() +{ + # 1. running systemd + if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then + return 1 + fi + # 2. disabled by config + if [ -z "$CLICKHOUSE_CRONFILE" ]; then + return 2 + fi + return 0 +} +# returns false if cron disabled (with systemd) +enable_cron() +{ + use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE" +} +# returns false if cron disabled (with systemd) +disable_cron() +{ + use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE" +} + + +is_cron_disabled() +{ + use_cron || return 0 + + # Assumes that either no lines are commented or all lines are commented. + # Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more). + grep -q -E '^#' "$CLICKHOUSE_CRONFILE"; +} + + +main() +{ + # See how we were called. + EXIT_STATUS=0 + case "$1" in + start) + service_or_func start && enable_cron + ;; + stop) + disable_cron + service_or_func stop + ;; + restart) + service_or_func restart && enable_cron + ;; + forcestop) + disable_cron + forcestop + ;; + forcerestart) + forcerestart && enable_cron + ;; + reload) + service_or_func restart + ;; + condstart) + service_or_func start + ;; + condstop) + service_or_func stop + ;; + condrestart) + service_or_func restart + ;; + condreload) + service_or_func restart + ;; + initdb) + initdb + ;; + enable_cron) + enable_cron + ;; + disable_cron) + disable_cron + ;; + *) + echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}" + exit 2 + ;; + esac + + exit $EXIT_STATUS +} + + +status() +{ + ${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}" +} + + +# Running commands without need of locking +case "$1" in +status) + status + exit 0 + ;; +esac + + +( + if $FLOCK -n 9; then + main "$@" + else + echo "Init script is already running" && exit 1 + fi +) 9> $LOCKFILE diff --git a/packages/clickhouse-server.postinstall b/packages/clickhouse-server.postinstall new file mode 100644 index 00000000000..419c13e3daf --- /dev/null +++ b/packages/clickhouse-server.postinstall @@ -0,0 +1,47 @@ +#!/bin/sh +set -e +# set -x + +PROGRAM=clickhouse-server +CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse} +CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}} +# Please note that we don't support paths with whitespaces. This is rather ignorant. +CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:=/etc/clickhouse-server} +CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse} +CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server} +CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin} +CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse} +EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config +CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml +CLICKHOUSE_PIDDIR=/var/run/$PROGRAM + +[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule +[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse + +if [ ! -f "/etc/debian_version" ]; then + not_deb_os=1 +fi + +if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then + + ${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}" + + if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then + # if old rc.d service present - remove it + if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then + /usr/sbin/update-rc.d clickhouse-server remove + fi + + /bin/systemctl daemon-reload + /bin/systemctl enable clickhouse-server + else + # If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server + if [ -x "/etc/init.d/clickhouse-server" ]; then + if [ -x "/usr/sbin/update-rc.d" ]; then + /usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $? + else + echo # Other OS + fi + fi + fi +fi diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service new file mode 100644 index 00000000000..a9400b24270 --- /dev/null +++ b/packages/clickhouse-server.service @@ -0,0 +1,27 @@ +[Unit] +Description=ClickHouse Server (analytic DBMS for big data) +Requires=network-online.target +# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure +# that the time was adjusted already, if you use systemd-timesyncd you are +# safe, but if you use ntp or some other daemon, you should configure it +# additionaly. +After=time-sync.target network-online.target +Wants=time-sync.target + +[Service] +Type=simple +User=clickhouse +Group=clickhouse +Restart=always +RestartSec=30 +RuntimeDirectory=clickhouse-server +ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid +# Minus means that this file is optional. +EnvironmentFile=-/etc/default/clickhouse +LimitCORE=infinity +LimitNOFILE=500000 +CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE + +[Install] +# ClickHouse should not start from the rescue shell (rescue.target). +WantedBy=multi-user.target diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml new file mode 100644 index 00000000000..ed56eb27e54 --- /dev/null +++ b/packages/clickhouse-server.yaml @@ -0,0 +1,68 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-server" +arch: "all" +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" + +conflicts: +- clickhouse-keeper +depends: +- adduser +replaces: +- clickhouse-server-common +- clickhouse-server-base +provides: +- clickhouse-server-common +recommends: +- libcap2-bin + +maintainer: "ClickHouse Dev Team " +description: | + Server binary for ClickHouse + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + This package provides clickhouse common configuration files + +overrides: + deb: + depends: + - clickhouse-common-static (= ${CLICKHOUSE_VERSION_STRING}) + rpm: + depends: + - clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING} + +contents: +- src: root/etc/clickhouse-server + dst: /etc/clickhouse-server + type: config +- src: clickhouse-server.init + dst: /etc/init.d/clickhouse-server +- src: clickhouse-server.service + dst: /lib/systemd/system/clickhouse-server.service +- src: root/usr/bin/clickhouse-copier + dst: /usr/bin/clickhouse-copier +- src: clickhouse + dst: /usr/bin/clickhouse-keeper + type: symlink +- src: root/usr/bin/clickhouse-report + dst: /usr/bin/clickhouse-report +- src: root/usr/bin/clickhouse-server + dst: /usr/bin/clickhouse-server +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-server/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-server/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-server/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-server/README.md + +scripts: + postinstall: ./clickhouse-server.postinstall diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 0890b9c95d3..cca7be97b61 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -460,10 +460,6 @@ else () list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter) endif () - if (NOT BUILD_STRIPPED_BINARIES_PREFIX) - install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - endif() - add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE}) if (USE_GDB_ADD_INDEX) @@ -474,13 +470,14 @@ else () add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM) endif() - if (BUILD_STRIPPED_BINARIES_PREFIX) - clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH clickhouse) + if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse) + else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT}) + install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() endif() - - if (ENABLE_TESTS) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index f3e7f1775b8..c2094b3b00d 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -820,6 +820,7 @@ void Client::addOptions(OptionsDescription & options_description) ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") ("no-warnings", "disable warnings when client connects to server") + ("fake-drop", "Ignore all DROP queries, should be used only for testing") ; /// Commandline options related to external tables. @@ -952,6 +953,8 @@ void Client::processOptions(const OptionsDescription & options_description, config().setBool("compression", options["compression"].as()); if (options.count("no-warnings")) config().setBool("no-warnings", true); + if (options.count("fake-drop")) + fake_drop = true; if ((query_fuzzer_runs = options["query-fuzzer-runs"].as())) { diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index f8df823ecb7..5dec09ea901 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -792,9 +792,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv) fmt::print("Setting capabilities for clickhouse binary. This is optional.\n"); std::string command = fmt::format("command -v setcap >/dev/null" " && command -v capsh >/dev/null" - " && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice+ep >/dev/null 2>&1" - " && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0}" - " || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary." + " && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep >/dev/null 2>&1" + " && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice,cap_net_bind_service+ep' {0}" + " || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' or 'net_bind_service' capability for clickhouse binary." " This is optional. Taskstats accounting will be disabled." " To enable taskstats accounting you may add the required capability later manually.\"", fs::canonical(main_bin_path).string()); diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 92bb5dc45a3..b82b13d9607 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -71,17 +71,11 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDelta.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDoubleDelta.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecGorilla.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecT64.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/getCompressionCodecForFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp @@ -137,5 +131,10 @@ if (BUILD_STANDALONE_KEEPER) add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper) + else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) + install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + endif() endif() diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index d7e104685c5..90ce3d8be7f 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -24,10 +24,9 @@ target_link_libraries(clickhouse-library-bridge PRIVATE set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) -if (BUILD_STRIPPED_BINARIES_PREFIX) - clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-library-bridge) -endif() - -if (NOT BUILD_STRIPPED_BINARIES_PREFIX) +if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge) +else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 26d42a11315..bb6684ca137 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -184,6 +184,11 @@ void LocalServer::tryInitPath() if (path.back() != '/') path += '/'; + fs::create_directories(fs::path(path) / "user_defined/"); + fs::create_directories(fs::path(path) / "data/"); + fs::create_directories(fs::path(path) / "metadata/"); + fs::create_directories(fs::path(path) / "metadata_dropped/"); + global_context->setPath(path); global_context->setTemporaryStorage(path + "tmp"); @@ -565,7 +570,6 @@ void LocalServer::processConfig() /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); - fs::create_directories(fs::path(path) / "user_defined/"); LOG_DEBUG(log, "Loading user defined objects from {}", path); Poco::File(path + "user_defined/").createDirectories(); UserDefinedSQLObjectsLoader::instance().loadObjects(global_context); @@ -573,9 +577,6 @@ void LocalServer::processConfig() LOG_DEBUG(log, "Loaded user defined objects."); LOG_DEBUG(log, "Loading metadata from {}", path); - fs::create_directories(fs::path(path) / "data/"); - fs::create_directories(fs::path(path) / "metadata/"); - loadMetadataSystem(global_context); attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 44493d7ab8a..b530e08ca26 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -39,11 +39,10 @@ if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) endif() -if (BUILD_STRIPPED_BINARIES_PREFIX) - clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-odbc-bridge) -endif() - -if (NOT BUILD_STRIPPED_BINARIES_PREFIX) +if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge) +else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 1b11453dde4..c12abda9594 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,8 @@ #include #include #include +#include +#include #include #include #include @@ -79,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -504,6 +508,101 @@ void checkForUsersNotInMainConfig( } } +/// Unused in other builds +#if defined(OS_LINUX) +static String readString(const String & path) +{ + ReadBufferFromFile in(path); + String contents; + readStringUntilEOF(contents, in); + return contents; +} + +static int readNumber(const String & path) +{ + ReadBufferFromFile in(path); + int result; + readText(result, in); + return result; +} + +#endif + +static void sanityChecks(Server * server) +{ + std::string data_path = getCanonicalPath(server->config().getString("path", DBMS_DEFAULT_PATH)); + std::string logs_path = server->config().getString("logger.log", ""); + +#if defined(OS_LINUX) + try + { + if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos) + server->context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded."); + } + catch (...) + { + } + + try + { + if (readNumber("/proc/sys/vm/overcommit_memory") == 2) + server->context()->addWarningMessage("Linux memory overcommit is disabled."); + } + catch (...) + { + } + + try + { + if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos) + server->context()->addWarningMessage("Linux transparent hugepage are set to \"always\"."); + } + catch (...) + { + } + + try + { + if (readNumber("/proc/sys/kernel/pid_max") < 30000) + server->context()->addWarningMessage("Linux max PID is too low."); + } + catch (...) + { + } + + try + { + if (readNumber("/proc/sys/kernel/threads-max") < 30000) + server->context()->addWarningMessage("Linux threads max count is too low."); + } + catch (...) + { + } + + std::string dev_id = getBlockDeviceId(data_path); + if (getBlockDeviceType(dev_id) == BlockDeviceType::ROT && getBlockDeviceReadAheadBytes(dev_id) == 0) + server->context()->addWarningMessage("Rotational disk with disabled readahead is in use. Performance can be degraded."); +#endif + + try + { + if (getAvailableMemoryAmount() < (2l << 30)) + server->context()->addWarningMessage("Available memory at server startup is too low (2GiB)."); + + if (!enoughSpaceInDirectory(data_path, 1ull << 30)) + server->context()->addWarningMessage("Available disk space at server startup is too low (1GiB)."); + + if (!logs_path.empty()) + { + if (!enoughSpaceInDirectory(fs::path(logs_path).parent_path(), 1ull << 30)) + server->context()->addWarningMessage("Available disk space at server startup is too low (1GiB)."); + } + } + catch (...) + { + } +} + int Server::main(const std::vector & /*args*/) { Poco::Logger * log = &logger(); @@ -537,13 +636,14 @@ int Server::main(const std::vector & /*args*/) global_context->addWarningMessage("Server was built in debug mode. It will work slowly."); #endif -if (ThreadFuzzer::instance().isEffective()) - global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); + if (ThreadFuzzer::instance().isEffective()) + global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); #endif + sanityChecks(this); // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will @@ -554,6 +654,10 @@ if (ThreadFuzzer::instance().isEffective()) config().getUInt("thread_pool_queue_size", 10000) ); + IOThreadPool::initialize( + config().getUInt("max_io_thread_pool_size", 100), + config().getUInt("max_io_thread_pool_free_size", 0), + config().getUInt("io_thread_pool_queue_size", 10000)); /// Initialize global local cache for remote filesystem. if (config().has("local_cache_for_remote_fs")) @@ -761,6 +865,38 @@ if (ThreadFuzzer::instance().isEffective()) } } + /// Try to increase limit on number of threads. + { + rlimit rlim; + if (getrlimit(RLIMIT_NPROC, &rlim)) + throw Poco::Exception("Cannot getrlimit"); + + if (rlim.rlim_cur == rlim.rlim_max) + { + LOG_DEBUG(log, "rlimit on number of threads is {}", rlim.rlim_cur); + } + else + { + rlim_t old = rlim.rlim_cur; + rlim.rlim_cur = rlim.rlim_max; + int rc = setrlimit(RLIMIT_NPROC, &rlim); + if (rc != 0) + { + LOG_WARNING(log, "Cannot set max number of threads to {}. error: {}", rlim.rlim_cur, strerror(errno)); + rlim.rlim_cur = old; + } + else + { + LOG_DEBUG(log, "Set max number of threads to {} (was {}).", rlim.rlim_cur, old); + } + } + + if (rlim.rlim_cur < 30000) + { + global_context->addWarningMessage("Maximum number of threads is lower than 30000. There could be problems with handling a lot of simultaneous queries."); + } + } + static ServerErrorHandler error_handler; Poco::ErrorHandler::set(&error_handler); @@ -824,6 +960,36 @@ if (ThreadFuzzer::instance().isEffective()) fs::create_directories(path / "metadata_dropped/"); } +#if USE_ROCKSDB + /// Initialize merge tree metadata cache + if (config().has("merge_tree_metadata_cache")) + { + fs::create_directories(path / "rocksdb/"); + size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20); + bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false); + try + { + LOG_DEBUG( + log, "Initiailizing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted); + global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); + } + catch (...) + { + if (continue_if_corrupted) + { + /// Rename rocksdb directory and reinitialize merge tree metadata cache + time_t now = time(nullptr); + fs::rename(path / "rocksdb", path / ("rocksdb.old." + std::to_string(now))); + global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); + } + else + { + throw; + } + } + } +#endif + if (config().has("interserver_http_port") && config().has("interserver_https_port")) throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); @@ -1022,8 +1188,8 @@ if (ThreadFuzzer::instance().isEffective()) std::make_unique( new KeeperTCPHandlerFactory( config_getter, global_context->getKeeperDispatcher(), - global_context->getSettingsRef().receive_timeout, - global_context->getSettingsRef().send_timeout, + global_context->getSettingsRef().receive_timeout.totalSeconds(), + global_context->getSettingsRef().send_timeout.totalSeconds(), false), server_pool, socket)); }); @@ -1045,8 +1211,8 @@ if (ThreadFuzzer::instance().isEffective()) std::make_unique( new KeeperTCPHandlerFactory( config_getter, global_context->getKeeperDispatcher(), - global_context->getSettingsRef().receive_timeout, - global_context->getSettingsRef().send_timeout, true), server_pool, socket)); + global_context->getSettingsRef().receive_timeout.totalSeconds(), + global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket)); #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", diff --git a/programs/server/config.xml b/programs/server/config.xml index 6ca64dc30c5..1de379b0b2a 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1294,4 +1294,10 @@ --> + + + + 268435456 + true + diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 0b69bd5fd0e..fb5eafbe679 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -182,6 +182,7 @@ enum class AccessType M(JDBC, "", GLOBAL, SOURCES) \ M(HDFS, "", GLOBAL, SOURCES) \ M(S3, "", GLOBAL, SOURCES) \ + M(HIVE, "", GLOBAL, SOURCES) \ M(SOURCES, "", GROUP, ALL) \ \ M(ALL, "ALL PRIVILEGES", GROUP, NONE) /* full access */ \ diff --git a/src/Access/Common/QuotaDefs.h b/src/Access/Common/QuotaDefs.h index cfd8a07d9ff..7a69f811ea5 100644 --- a/src/Access/Common/QuotaDefs.h +++ b/src/Access/Common/QuotaDefs.h @@ -13,7 +13,7 @@ enum class QuotaType { QUERIES, /// Number of queries. QUERY_SELECTS, /// Number of select queries. - QUERY_INSERTS, /// Number of inserts queries. + QUERY_INSERTS, /// Number of insert queries. ERRORS, /// Number of queries with exceptions. RESULT_ROWS, /// Number of rows returned as result. RESULT_BYTES, /// Number of bytes returned as result. diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp new file mode 100644 index 00000000000..50d5f075322 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF; +static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10; + + +namespace DB +{ +struct Settings; + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +namespace +{ + template + class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted + { + using AggregateFunctionGroupArraySorted::AggregateFunctionGroupArraySorted; + }; + + template + class AggregateFunctionGroupArraySortedFieldType + : public AggregateFunctionGroupArraySorted + { + using AggregateFunctionGroupArraySorted:: + AggregateFunctionGroupArraySorted; + DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + }; + + template