mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-20 06:32:08 +00:00
Merge branch 'master' into MDB-16962
This commit is contained in:
commit
32c6390400
@ -33,6 +33,8 @@ Checks: '-*,
|
||||
performance-no-automatic-move,
|
||||
performance-trivially-destructible,
|
||||
performance-unnecessary-copy-initialization,
|
||||
performance-noexcept-move-constructor,
|
||||
performance-move-const-arg,
|
||||
|
||||
readability-avoid-const-params-in-decls,
|
||||
readability-const-return-type,
|
||||
@ -206,3 +208,5 @@ CheckOptions:
|
||||
value: CamelCase
|
||||
- key: modernize-loop-convert.UseCxx20ReverseRanges
|
||||
value: false
|
||||
- key: performance-move-const-arg.CheckTriviallyCopyableMove
|
||||
value: false
|
||||
|
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1,2 +1,3 @@
|
||||
contrib/* linguist-vendored
|
||||
*.h linguist-language=C++
|
||||
tests/queries/0_stateless/data_json/* binary
|
||||
|
2
.github/ISSUE_TEMPLATE/10_question.md
vendored
2
.github/ISSUE_TEMPLATE/10_question.md
vendored
@ -7,6 +7,6 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
> Make sure to check documentation https://clickhouse.yandex/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
|
||||
> If you still prefer GitHub issues, remove all this text and ask your question here.
|
||||
|
2
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
2
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
@ -7,7 +7,7 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.yandex/docs/en/development/build/
|
||||
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/
|
||||
|
||||
**Operating system**
|
||||
|
||||
|
17
.github/codecov.yml
vendored
17
.github/codecov.yml
vendored
@ -1,17 +0,0 @@
|
||||
codecov:
|
||||
max_report_age: "off"
|
||||
strict_yaml_branch: "master"
|
||||
|
||||
ignore:
|
||||
- "contrib"
|
||||
- "docs"
|
||||
- "benchmark"
|
||||
- "tests"
|
||||
- "docker"
|
||||
- "debian"
|
||||
- "cmake"
|
||||
|
||||
comment: false
|
||||
|
||||
github_checks:
|
||||
annotations: false
|
43
.github/workflows/anchore-analysis.yml
vendored
43
.github/workflows/anchore-analysis.yml
vendored
@ -1,43 +0,0 @@
|
||||
# This workflow checks out code, performs an Anchore container image
|
||||
# vulnerability and compliance scan, and integrates the results with
|
||||
# GitHub Advanced Security code scanning feature. For more information on
|
||||
# the Anchore scan action usage and parameters, see
|
||||
# https://github.com/anchore/scan-action. For more information on
|
||||
# Anchore container image scanning in general, see
|
||||
# https://docs.anchore.com.
|
||||
|
||||
name: Docker Container Scan (clickhouse-server)
|
||||
|
||||
env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
"on":
|
||||
pull_request:
|
||||
paths:
|
||||
- docker/server/Dockerfile
|
||||
- .github/workflows/anchore-analysis.yml
|
||||
schedule:
|
||||
- cron: '0 21 * * *'
|
||||
|
||||
jobs:
|
||||
Anchore-Build-Scan:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout the code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build the Docker image
|
||||
run: |
|
||||
cd docker/server
|
||||
perl -pi -e 's|=\$version||g' Dockerfile
|
||||
docker build . --file Dockerfile --tag localbuild/testimage:latest
|
||||
- name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
|
||||
uses: anchore/scan-action@v2
|
||||
id: scan
|
||||
with:
|
||||
image: "localbuild/testimage:latest"
|
||||
acs-report-enable: true
|
||||
- name: Upload Anchore Scan Report
|
||||
uses: github/codeql-action/upload-sarif@v1
|
||||
with:
|
||||
sarif_file: ${{ steps.scan.outputs.sarif }}
|
45
.github/workflows/pull_request.yml
vendored
45
.github/workflows/pull_request.yml
vendored
@ -1733,6 +1733,51 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
TestsBugfixCheck:
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/tests_bugfix_check
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Tests bugfix validate check (actions)
|
||||
KILL_TIMEOUT=3600
|
||||
REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Bugfix test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
|
||||
TEMP_PATH="${TEMP_PATH}/integration" \
|
||||
REPORTS_PATH="${REPORTS_PATH}/integration" \
|
||||
python3 integration_test_check.py "Integration tests bugfix validate check" \
|
||||
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
|
||||
|
||||
TEMP_PATH="${TEMP_PATH}/stateless" \
|
||||
REPORTS_PATH="${REPORTS_PATH}/stateless" \
|
||||
python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \
|
||||
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
|
||||
|
||||
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
############################ FUNCTIONAl STATEFUL TESTS #######################################
|
||||
##############################################################################################
|
||||
|
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@ -32,7 +32,7 @@ jobs:
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
file: ${{runner.temp}}/release_packages/*
|
||||
file: ${{runner.temp}}/push_to_artifactory/*
|
||||
overwrite: true
|
||||
tag: ${{ github.ref }}
|
||||
file_glob: true
|
||||
|
2
.github/workflows/tags_stable.yml
vendored
2
.github/workflows/tags_stable.yml
vendored
@ -27,6 +27,8 @@ jobs:
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v3
|
||||
with:
|
||||
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
commit-message: Update version_date.tsv after ${{ env.GITHUB_TAG }}
|
||||
branch: auto/${{ env.GITHUB_TAG }}
|
||||
delete-branch: true
|
||||
|
116
.gitmodules
vendored
116
.gitmodules
vendored
@ -1,6 +1,6 @@
|
||||
[submodule "contrib/poco"]
|
||||
path = contrib/poco
|
||||
url = https://github.com/ClickHouse-Extras/poco.git
|
||||
url = https://github.com/ClickHouse/poco.git
|
||||
branch = clickhouse
|
||||
[submodule "contrib/zstd"]
|
||||
path = contrib/zstd
|
||||
@ -10,13 +10,13 @@
|
||||
url = https://github.com/lz4/lz4.git
|
||||
[submodule "contrib/librdkafka"]
|
||||
path = contrib/librdkafka
|
||||
url = https://github.com/ClickHouse-Extras/librdkafka.git
|
||||
url = https://github.com/ClickHouse/librdkafka.git
|
||||
[submodule "contrib/cctz"]
|
||||
path = contrib/cctz
|
||||
url = https://github.com/ClickHouse-Extras/cctz.git
|
||||
url = https://github.com/ClickHouse/cctz.git
|
||||
[submodule "contrib/zlib-ng"]
|
||||
path = contrib/zlib-ng
|
||||
url = https://github.com/ClickHouse-Extras/zlib-ng.git
|
||||
url = https://github.com/ClickHouse/zlib-ng.git
|
||||
branch = clickhouse-2.0.x
|
||||
[submodule "contrib/googletest"]
|
||||
path = contrib/googletest
|
||||
@ -32,51 +32,51 @@
|
||||
url = https://github.com/google/re2.git
|
||||
[submodule "contrib/llvm"]
|
||||
path = contrib/llvm
|
||||
url = https://github.com/ClickHouse-Extras/llvm
|
||||
url = https://github.com/ClickHouse/llvm
|
||||
[submodule "contrib/mariadb-connector-c"]
|
||||
path = contrib/mariadb-connector-c
|
||||
url = https://github.com/ClickHouse-Extras/mariadb-connector-c.git
|
||||
url = https://github.com/ClickHouse/mariadb-connector-c.git
|
||||
[submodule "contrib/jemalloc"]
|
||||
path = contrib/jemalloc
|
||||
url = https://github.com/jemalloc/jemalloc.git
|
||||
[submodule "contrib/unixodbc"]
|
||||
path = contrib/unixodbc
|
||||
url = https://github.com/ClickHouse-Extras/UnixODBC.git
|
||||
url = https://github.com/ClickHouse/UnixODBC.git
|
||||
[submodule "contrib/protobuf"]
|
||||
path = contrib/protobuf
|
||||
url = https://github.com/ClickHouse-Extras/protobuf.git
|
||||
url = https://github.com/ClickHouse/protobuf.git
|
||||
branch = v3.13.0.1
|
||||
[submodule "contrib/boost"]
|
||||
path = contrib/boost
|
||||
url = https://github.com/ClickHouse-Extras/boost.git
|
||||
url = https://github.com/ClickHouse/boost.git
|
||||
[submodule "contrib/base64"]
|
||||
path = contrib/base64
|
||||
url = https://github.com/ClickHouse-Extras/Turbo-Base64.git
|
||||
url = https://github.com/ClickHouse/Turbo-Base64.git
|
||||
[submodule "contrib/arrow"]
|
||||
path = contrib/arrow
|
||||
url = https://github.com/ClickHouse-Extras/arrow.git
|
||||
url = https://github.com/ClickHouse/arrow.git
|
||||
branch = blessed/release-6.0.1
|
||||
[submodule "contrib/thrift"]
|
||||
path = contrib/thrift
|
||||
url = https://github.com/apache/thrift.git
|
||||
[submodule "contrib/libhdfs3"]
|
||||
path = contrib/libhdfs3
|
||||
url = https://github.com/ClickHouse-Extras/libhdfs3.git
|
||||
url = https://github.com/ClickHouse/libhdfs3.git
|
||||
[submodule "contrib/libxml2"]
|
||||
path = contrib/libxml2
|
||||
url = https://github.com/GNOME/libxml2.git
|
||||
[submodule "contrib/libgsasl"]
|
||||
path = contrib/libgsasl
|
||||
url = https://github.com/ClickHouse-Extras/libgsasl.git
|
||||
url = https://github.com/ClickHouse/libgsasl.git
|
||||
[submodule "contrib/libcxx"]
|
||||
path = contrib/libcxx
|
||||
url = https://github.com/ClickHouse-Extras/libcxx.git
|
||||
url = https://github.com/ClickHouse/libcxx.git
|
||||
[submodule "contrib/libcxxabi"]
|
||||
path = contrib/libcxxabi
|
||||
url = https://github.com/ClickHouse-Extras/libcxxabi.git
|
||||
url = https://github.com/ClickHouse/libcxxabi.git
|
||||
[submodule "contrib/snappy"]
|
||||
path = contrib/snappy
|
||||
url = https://github.com/ClickHouse-Extras/snappy.git
|
||||
url = https://github.com/ClickHouse/snappy.git
|
||||
[submodule "contrib/cppkafka"]
|
||||
path = contrib/cppkafka
|
||||
url = https://github.com/mfontanini/cppkafka.git
|
||||
@ -85,95 +85,95 @@
|
||||
url = https://github.com/google/brotli.git
|
||||
[submodule "contrib/h3"]
|
||||
path = contrib/h3
|
||||
url = https://github.com/ClickHouse-Extras/h3
|
||||
url = https://github.com/ClickHouse/h3
|
||||
[submodule "contrib/hyperscan"]
|
||||
path = contrib/hyperscan
|
||||
url = https://github.com/ClickHouse-Extras/hyperscan.git
|
||||
url = https://github.com/ClickHouse/hyperscan.git
|
||||
[submodule "contrib/libunwind"]
|
||||
path = contrib/libunwind
|
||||
url = https://github.com/ClickHouse-Extras/libunwind.git
|
||||
url = https://github.com/ClickHouse/libunwind.git
|
||||
[submodule "contrib/simdjson"]
|
||||
path = contrib/simdjson
|
||||
url = https://github.com/simdjson/simdjson.git
|
||||
[submodule "contrib/rapidjson"]
|
||||
path = contrib/rapidjson
|
||||
url = https://github.com/ClickHouse-Extras/rapidjson
|
||||
url = https://github.com/ClickHouse/rapidjson
|
||||
[submodule "contrib/fastops"]
|
||||
path = contrib/fastops
|
||||
url = https://github.com/ClickHouse-Extras/fastops
|
||||
url = https://github.com/ClickHouse/fastops
|
||||
[submodule "contrib/orc"]
|
||||
path = contrib/orc
|
||||
url = https://github.com/ClickHouse-Extras/orc
|
||||
url = https://github.com/ClickHouse/orc
|
||||
[submodule "contrib/sparsehash-c11"]
|
||||
path = contrib/sparsehash-c11
|
||||
url = https://github.com/sparsehash/sparsehash-c11.git
|
||||
[submodule "contrib/grpc"]
|
||||
path = contrib/grpc
|
||||
url = https://github.com/ClickHouse-Extras/grpc.git
|
||||
url = https://github.com/ClickHouse/grpc.git
|
||||
branch = v1.33.2
|
||||
[submodule "contrib/aws"]
|
||||
path = contrib/aws
|
||||
url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git
|
||||
url = https://github.com/ClickHouse/aws-sdk-cpp.git
|
||||
[submodule "aws-c-event-stream"]
|
||||
path = contrib/aws-c-event-stream
|
||||
url = https://github.com/ClickHouse-Extras/aws-c-event-stream.git
|
||||
url = https://github.com/ClickHouse/aws-c-event-stream.git
|
||||
[submodule "aws-c-common"]
|
||||
path = contrib/aws-c-common
|
||||
url = https://github.com/ClickHouse-Extras/aws-c-common.git
|
||||
url = https://github.com/ClickHouse/aws-c-common.git
|
||||
[submodule "aws-checksums"]
|
||||
path = contrib/aws-checksums
|
||||
url = https://github.com/ClickHouse-Extras/aws-checksums.git
|
||||
url = https://github.com/ClickHouse/aws-checksums.git
|
||||
[submodule "contrib/curl"]
|
||||
path = contrib/curl
|
||||
url = https://github.com/curl/curl.git
|
||||
[submodule "contrib/icudata"]
|
||||
path = contrib/icudata
|
||||
url = https://github.com/ClickHouse-Extras/icudata.git
|
||||
url = https://github.com/ClickHouse/icudata.git
|
||||
[submodule "contrib/icu"]
|
||||
path = contrib/icu
|
||||
url = https://github.com/unicode-org/icu.git
|
||||
[submodule "contrib/flatbuffers"]
|
||||
path = contrib/flatbuffers
|
||||
url = https://github.com/ClickHouse-Extras/flatbuffers.git
|
||||
url = https://github.com/ClickHouse/flatbuffers.git
|
||||
[submodule "contrib/replxx"]
|
||||
path = contrib/replxx
|
||||
url = https://github.com/ClickHouse-Extras/replxx.git
|
||||
url = https://github.com/ClickHouse/replxx.git
|
||||
[submodule "contrib/avro"]
|
||||
path = contrib/avro
|
||||
url = https://github.com/ClickHouse-Extras/avro.git
|
||||
url = https://github.com/ClickHouse/avro.git
|
||||
ignore = untracked
|
||||
[submodule "contrib/msgpack-c"]
|
||||
path = contrib/msgpack-c
|
||||
url = https://github.com/msgpack/msgpack-c
|
||||
[submodule "contrib/libcpuid"]
|
||||
path = contrib/libcpuid
|
||||
url = https://github.com/ClickHouse-Extras/libcpuid.git
|
||||
url = https://github.com/ClickHouse/libcpuid.git
|
||||
[submodule "contrib/openldap"]
|
||||
path = contrib/openldap
|
||||
url = https://github.com/ClickHouse-Extras/openldap.git
|
||||
url = https://github.com/ClickHouse/openldap.git
|
||||
[submodule "contrib/AMQP-CPP"]
|
||||
path = contrib/AMQP-CPP
|
||||
url = https://github.com/ClickHouse-Extras/AMQP-CPP.git
|
||||
url = https://github.com/ClickHouse/AMQP-CPP.git
|
||||
[submodule "contrib/cassandra"]
|
||||
path = contrib/cassandra
|
||||
url = https://github.com/ClickHouse-Extras/cpp-driver.git
|
||||
url = https://github.com/ClickHouse/cpp-driver.git
|
||||
branch = clickhouse
|
||||
[submodule "contrib/libuv"]
|
||||
path = contrib/libuv
|
||||
url = https://github.com/ClickHouse-Extras/libuv.git
|
||||
url = https://github.com/ClickHouse/libuv.git
|
||||
branch = clickhouse
|
||||
[submodule "contrib/fmtlib"]
|
||||
path = contrib/fmtlib
|
||||
url = https://github.com/fmtlib/fmt.git
|
||||
[submodule "contrib/sentry-native"]
|
||||
path = contrib/sentry-native
|
||||
url = https://github.com/ClickHouse-Extras/sentry-native.git
|
||||
url = https://github.com/ClickHouse/sentry-native.git
|
||||
[submodule "contrib/krb5"]
|
||||
path = contrib/krb5
|
||||
url = https://github.com/ClickHouse-Extras/krb5
|
||||
url = https://github.com/ClickHouse/krb5
|
||||
[submodule "contrib/cyrus-sasl"]
|
||||
path = contrib/cyrus-sasl
|
||||
url = https://github.com/ClickHouse-Extras/cyrus-sasl
|
||||
url = https://github.com/ClickHouse/cyrus-sasl
|
||||
branch = cyrus-sasl-2.1
|
||||
[submodule "contrib/croaring"]
|
||||
path = contrib/croaring
|
||||
@ -184,7 +184,7 @@
|
||||
url = https://github.com/danlark1/miniselect
|
||||
[submodule "contrib/rocksdb"]
|
||||
path = contrib/rocksdb
|
||||
url = https://github.com/ClickHouse-Extras/rocksdb.git
|
||||
url = https://github.com/ClickHouse/rocksdb.git
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
@ -194,53 +194,53 @@
|
||||
branch = lts_2021_11_02
|
||||
[submodule "contrib/dragonbox"]
|
||||
path = contrib/dragonbox
|
||||
url = https://github.com/ClickHouse-Extras/dragonbox.git
|
||||
url = https://github.com/ClickHouse/dragonbox.git
|
||||
[submodule "contrib/fast_float"]
|
||||
path = contrib/fast_float
|
||||
url = https://github.com/fastfloat/fast_float
|
||||
[submodule "contrib/libpq"]
|
||||
path = contrib/libpq
|
||||
url = https://github.com/ClickHouse-Extras/libpq
|
||||
url = https://github.com/ClickHouse/libpq
|
||||
[submodule "contrib/boringssl"]
|
||||
path = contrib/boringssl
|
||||
url = https://github.com/ClickHouse-Extras/boringssl.git
|
||||
url = https://github.com/ClickHouse/boringssl.git
|
||||
branch = MergeWithUpstream
|
||||
[submodule "contrib/NuRaft"]
|
||||
path = contrib/NuRaft
|
||||
url = https://github.com/ClickHouse-Extras/NuRaft.git
|
||||
url = https://github.com/ClickHouse/NuRaft.git
|
||||
[submodule "contrib/nanodbc"]
|
||||
path = contrib/nanodbc
|
||||
url = https://github.com/ClickHouse-Extras/nanodbc.git
|
||||
url = https://github.com/ClickHouse/nanodbc.git
|
||||
[submodule "contrib/datasketches-cpp"]
|
||||
path = contrib/datasketches-cpp
|
||||
url = https://github.com/ClickHouse-Extras/datasketches-cpp.git
|
||||
url = https://github.com/ClickHouse/datasketches-cpp.git
|
||||
[submodule "contrib/yaml-cpp"]
|
||||
path = contrib/yaml-cpp
|
||||
url = https://github.com/ClickHouse-Extras/yaml-cpp.git
|
||||
url = https://github.com/ClickHouse/yaml-cpp.git
|
||||
[submodule "contrib/cld2"]
|
||||
path = contrib/cld2
|
||||
url = https://github.com/ClickHouse-Extras/cld2.git
|
||||
url = https://github.com/ClickHouse/cld2.git
|
||||
[submodule "contrib/libstemmer_c"]
|
||||
path = contrib/libstemmer_c
|
||||
url = https://github.com/ClickHouse-Extras/libstemmer_c.git
|
||||
url = https://github.com/ClickHouse/libstemmer_c.git
|
||||
[submodule "contrib/wordnet-blast"]
|
||||
path = contrib/wordnet-blast
|
||||
url = https://github.com/ClickHouse-Extras/wordnet-blast.git
|
||||
url = https://github.com/ClickHouse/wordnet-blast.git
|
||||
[submodule "contrib/lemmagen-c"]
|
||||
path = contrib/lemmagen-c
|
||||
url = https://github.com/ClickHouse-Extras/lemmagen-c.git
|
||||
url = https://github.com/ClickHouse/lemmagen-c.git
|
||||
[submodule "contrib/libpqxx"]
|
||||
path = contrib/libpqxx
|
||||
url = https://github.com/ClickHouse-Extras/libpqxx.git
|
||||
url = https://github.com/ClickHouse/libpqxx.git
|
||||
[submodule "contrib/sqlite-amalgamation"]
|
||||
path = contrib/sqlite-amalgamation
|
||||
url = https://github.com/azadkuh/sqlite-amalgamation
|
||||
[submodule "contrib/s2geometry"]
|
||||
path = contrib/s2geometry
|
||||
url = https://github.com/ClickHouse-Extras/s2geometry.git
|
||||
url = https://github.com/ClickHouse/s2geometry.git
|
||||
[submodule "contrib/bzip2"]
|
||||
path = contrib/bzip2
|
||||
url = https://github.com/ClickHouse-Extras/bzip2.git
|
||||
url = https://github.com/ClickHouse/bzip2.git
|
||||
[submodule "contrib/magic_enum"]
|
||||
path = contrib/magic_enum
|
||||
url = https://github.com/Neargye/magic_enum
|
||||
@ -249,16 +249,16 @@
|
||||
url = https://github.com/google/libprotobuf-mutator
|
||||
[submodule "contrib/sysroot"]
|
||||
path = contrib/sysroot
|
||||
url = https://github.com/ClickHouse-Extras/sysroot.git
|
||||
url = https://github.com/ClickHouse/sysroot.git
|
||||
[submodule "contrib/nlp-data"]
|
||||
path = contrib/nlp-data
|
||||
url = https://github.com/ClickHouse-Extras/nlp-data.git
|
||||
url = https://github.com/ClickHouse/nlp-data.git
|
||||
[submodule "contrib/hive-metastore"]
|
||||
path = contrib/hive-metastore
|
||||
url = https://github.com/ClickHouse-Extras/hive-metastore
|
||||
url = https://github.com/ClickHouse/hive-metastore
|
||||
[submodule "contrib/azure"]
|
||||
path = contrib/azure
|
||||
url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git
|
||||
url = https://github.com/ClickHouse/azure-sdk-for-cpp.git
|
||||
[submodule "contrib/minizip-ng"]
|
||||
path = contrib/minizip-ng
|
||||
url = https://github.com/zlib-ng/minizip-ng
|
||||
|
138
CHANGELOG.md
138
CHANGELOG.md
@ -1,4 +1,138 @@
|
||||
### ClickHouse release v22.2, 2022-02-17
|
||||
### Table of Contents
|
||||
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br>
|
||||
**[ClickHouse release v22.2, 2022-02-17](#222)**<br>
|
||||
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
|
||||
**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**<br>
|
||||
|
||||
|
||||
## <a id="223"></a> ClickHouse release v22.3-lts, 2022-03-17
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
||||
* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
|
||||
* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
|
||||
|
||||
#### New Feature
|
||||
|
||||
* Support for caching data locally for remote filesystems. It can be enabled for `s3` disks. Closes [#28961](https://github.com/ClickHouse/ClickHouse/issues/28961). [#33717](https://github.com/ClickHouse/ClickHouse/pull/33717) ([Kseniia Sumarokova](https://github.com/kssenii)). In the meantime, we enabled the test suite on s3 filesystem and no more known issues exist, so it is started to be production ready.
|
||||
* Add new table function `hive`. It can be used as follows `hive('<hive metastore url>', '<hive database>', '<hive table name>', '<columns definition>', '<partition columns>')` for example `SELECT * FROM hive('thrift://hivetest:9083', 'test', 'demo', 'id Nullable(String), score Nullable(Int32), day Nullable(String)', 'day')`. [#34946](https://github.com/ClickHouse/ClickHouse/pull/34946) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Support authentication of users connected via SSL by their X.509 certificate. [#31484](https://github.com/ClickHouse/ClickHouse/pull/31484) ([eungenue](https://github.com/eungenue)).
|
||||
* Support schema inference for inserting into table functions `file`/`hdfs`/`s3`/`url`. [#34732](https://github.com/ClickHouse/ClickHouse/pull/34732) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Now you can read `system.zookeeper` table without restrictions on path or using `like` expression. This reads can generate quite heavy load for zookeeper so to enable this ability you have to enable setting `allow_unrestricted_reads_from_keeper`. [#34609](https://github.com/ClickHouse/ClickHouse/pull/34609) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Display CPU and memory metrics in clickhouse-local. Close [#34545](https://github.com/ClickHouse/ClickHouse/issues/34545). [#34605](https://github.com/ClickHouse/ClickHouse/pull/34605) ([李扬](https://github.com/taiyang-li)).
|
||||
* Implement `startsWith` and `endsWith` function for arrays, closes [#33982](https://github.com/ClickHouse/ClickHouse/issues/33982). [#34368](https://github.com/ClickHouse/ClickHouse/pull/34368) ([usurai](https://github.com/usurai)).
|
||||
* Add three functions for Map data type: 1. `mapReplace(map1, map2)` - replaces values for keys in map1 with the values of the corresponding keys in map2; adds keys from map2 that don't exist in map1. 2. `mapFilter` 3. `mapMap`. mapFilter and mapMap are higher order functions, accepting two arguments, the first argument is a lambda function with k, v pair as arguments, the second argument is a column of type Map. [#33698](https://github.com/ClickHouse/ClickHouse/pull/33698) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Allow getting default user and password for clickhouse-client from the `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables. Close [#34538](https://github.com/ClickHouse/ClickHouse/issues/34538). [#34947](https://github.com/ClickHouse/ClickHouse/pull/34947) ([DR](https://github.com/freedomDR)).
|
||||
|
||||
#### Experimental Feature
|
||||
|
||||
* New data type `Object(<schema_format>)`, which supports storing of semi-structured data (for now JSON only). Data is written to such types as string. Then all paths are extracted according to format of semi-structured data and written as separate columns in most optimal types, that can store all their values. Those columns can be queried by names that match paths in source data. E.g `data.key1.key2` or with cast operator `data.key1.key2::Int64`.
|
||||
* Add `database_replicated_allow_only_replicated_engine` setting. When enabled, it only allowed to only create `Replicated` tables or tables with stateless engines in `Replicated` databases. [#35214](https://github.com/ClickHouse/ClickHouse/pull/35214) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). Note that `Replicated` database is still an experimental feature.
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Improve performance of insertion into `MergeTree` tables by optimizing sorting. Up to 2x improvement is observed on realistic benchmarks. [#34750](https://github.com/ClickHouse/ClickHouse/pull/34750) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Columns pruning when reading Parquet, ORC and Arrow files from URL and S3. Closes [#34163](https://github.com/ClickHouse/ClickHouse/issues/34163). [#34849](https://github.com/ClickHouse/ClickHouse/pull/34849) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Columns pruning when reading Parquet, ORC and Arrow files from Hive. [#34954](https://github.com/ClickHouse/ClickHouse/pull/34954) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* A bunch of performance optimizations from a performance superhero. Improve performance of processing queries with large `IN` section. Improve performance of `direct` dictionary if its source is `ClickHouse`. Improve performance of `detectCharset `, `detectLanguageUnknown ` functions. [#34888](https://github.com/ClickHouse/ClickHouse/pull/34888) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of `any` aggregate function by using more batching. [#34760](https://github.com/ClickHouse/ClickHouse/pull/34760) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Multiple improvements for performance of `clickhouse-keeper`: less locking [#35010](https://github.com/ClickHouse/ClickHouse/pull/35010) ([zhanglistar](https://github.com/zhanglistar)), lower memory usage by streaming reading and writing of snapshot instead of full copy. [#34584](https://github.com/ClickHouse/ClickHouse/pull/34584) ([zhanglistar](https://github.com/zhanglistar)), optimizing compaction of log store in the RAFT implementation. [#34534](https://github.com/ClickHouse/ClickHouse/pull/34534) ([zhanglistar](https://github.com/zhanglistar)), versioning of the internal data structure [#34486](https://github.com/ClickHouse/ClickHouse/pull/34486) ([zhanglistar](https://github.com/zhanglistar)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Allow asynchronous inserts to table functions. Fixes [#34864](https://github.com/ClickHouse/ClickHouse/issues/34864). [#34866](https://github.com/ClickHouse/ClickHouse/pull/34866) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Implicit type casting of the key argument for functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants`. Closes [#34970](https://github.com/ClickHouse/ClickHouse/issues/34970). [#35027](https://github.com/ClickHouse/ClickHouse/pull/35027) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* `EXPLAIN AST` query can output AST in form of a graph in Graphviz format: `EXPLAIN AST graph = 1 SELECT * FROM system.parts`. [#35173](https://github.com/ClickHouse/ClickHouse/pull/35173) ([李扬](https://github.com/taiyang-li)).
|
||||
* When large files were written with `s3` table function or table engine, the content type on the files was mistakenly set to `application/xml` due to a bug in the AWS SDK. This closes [#33964](https://github.com/ClickHouse/ClickHouse/issues/33964). [#34433](https://github.com/ClickHouse/ClickHouse/pull/34433) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Change restrictive row policies a bit to make them an easier alternative to permissive policies in easy cases. If for a particular table only restrictive policies exist (without permissive policies) users will be able to see some rows. Also `SHOW CREATE ROW POLICY` will always show `AS permissive` or `AS restrictive` in row policy's definition. [#34596](https://github.com/ClickHouse/ClickHouse/pull/34596) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Improve schema inference with globs in File/S3/HDFS/URL engines. Try to use the next path for schema inference in case of error. [#34465](https://github.com/ClickHouse/ClickHouse/pull/34465) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Play UI now correctly detects the preferred light/dark theme from the OS. [#35068](https://github.com/ClickHouse/ClickHouse/pull/35068) ([peledni](https://github.com/peledni)).
|
||||
* Added `date_time_input_format = 'best_effort_us'`. Closes [#34799](https://github.com/ClickHouse/ClickHouse/issues/34799). [#34982](https://github.com/ClickHouse/ClickHouse/pull/34982) ([WenYao](https://github.com/Cai-Yao)).
|
||||
* A new settings called `allow_plaintext_password` and `allow_no_password` are added in server configuration which turn on/off authentication types that can be potentially insecure in some environments. They are allowed by default. [#34738](https://github.com/ClickHouse/ClickHouse/pull/34738) ([Heena Bansal](https://github.com/HeenaBansal2009)).
|
||||
* Support for `DateTime64` data type in `Arrow` format, closes [#8280](https://github.com/ClickHouse/ClickHouse/issues/8280) and closes [#28574](https://github.com/ClickHouse/ClickHouse/issues/28574). [#34561](https://github.com/ClickHouse/ClickHouse/pull/34561) ([李扬](https://github.com/taiyang-li)).
|
||||
* Reload `remote_url_allow_hosts` (filtering of outgoing connections) on config update. [#35294](https://github.com/ClickHouse/ClickHouse/pull/35294) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Support `--testmode` parameter for `clickhouse-local`. This parameter enables interpretation of test hints that we use in functional tests. [#35264](https://github.com/ClickHouse/ClickHouse/pull/35264) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add `distributed_depth` to query log. It is like a more detailed variant of `is_initial_query` [#35207](https://github.com/ClickHouse/ClickHouse/pull/35207) ([李扬](https://github.com/taiyang-li)).
|
||||
* Respect `remote_url_allow_hosts` for `MySQL` and `PostgreSQL` table functions. [#35191](https://github.com/ClickHouse/ClickHouse/pull/35191) ([Heena Bansal](https://github.com/HeenaBansal2009)).
|
||||
* Added `disk_name` field to `system.part_log`. [#35178](https://github.com/ClickHouse/ClickHouse/pull/35178) ([Artyom Yurkov](https://github.com/Varinara)).
|
||||
* Do not retry non-rertiable errors when querying remote URLs. Closes [#35161](https://github.com/ClickHouse/ClickHouse/issues/35161). [#35172](https://github.com/ClickHouse/ClickHouse/pull/35172) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Support distributed INSERT SELECT queries (the setting `parallel_distributed_insert_select`) table function `view()`. [#35132](https://github.com/ClickHouse/ClickHouse/pull/35132) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* More precise memory tracking during `INSERT` into `Buffer` with `AggregateFunction`. [#35072](https://github.com/ClickHouse/ClickHouse/pull/35072) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Avoid division by zero in Query Profiler if Linux kernel has a bug. Closes [#34787](https://github.com/ClickHouse/ClickHouse/issues/34787). [#35032](https://github.com/ClickHouse/ClickHouse/pull/35032) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add more sanity checks for keeper configuration: now mixing of localhost and non-local servers is not allowed, also add checks for same value of internal raft port and keeper client port. [#35004](https://github.com/ClickHouse/ClickHouse/pull/35004) ([alesapin](https://github.com/alesapin)).
|
||||
* Currently, if the user changes the settings of the system tables there will be tons of logs and ClickHouse will rename the tables every minute. This fixes [#34929](https://github.com/ClickHouse/ClickHouse/issues/34929). [#34949](https://github.com/ClickHouse/ClickHouse/pull/34949) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Use connection pool for Hive metastore client. [#34940](https://github.com/ClickHouse/ClickHouse/pull/34940) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Ignore per-column `TTL` in `CREATE TABLE AS` if new table engine does not support it (i.e. if the engine is not of `MergeTree` family). [#34938](https://github.com/ClickHouse/ClickHouse/pull/34938) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Allow `LowCardinality` strings for `ngrambf_v1`/`tokenbf_v1` indexes. Closes [#21865](https://github.com/ClickHouse/ClickHouse/issues/21865). [#34911](https://github.com/ClickHouse/ClickHouse/pull/34911) ([Lars Hiller Eidnes](https://github.com/larspars)).
|
||||
* Allow opening empty sqlite db if the file doesn't exist. Closes [#33367](https://github.com/ClickHouse/ClickHouse/issues/33367). [#34907](https://github.com/ClickHouse/ClickHouse/pull/34907) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Implement memory statistics for FreeBSD - this is required for `max_server_memory_usage` to work correctly. [#34902](https://github.com/ClickHouse/ClickHouse/pull/34902) ([Alexandre Snarskii](https://github.com/snar)).
|
||||
* In previous versions the progress bar in clickhouse-client can jump forward near 50% for no reason. This closes [#34324](https://github.com/ClickHouse/ClickHouse/issues/34324). [#34801](https://github.com/ClickHouse/ClickHouse/pull/34801) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Now `ALTER TABLE DROP COLUMN columnX` queries for `MergeTree` table engines will work instantly when `columnX` is an `ALIAS` column. Fixes [#34660](https://github.com/ClickHouse/ClickHouse/issues/34660). [#34786](https://github.com/ClickHouse/ClickHouse/pull/34786) ([alesapin](https://github.com/alesapin)).
|
||||
* Show hints when user mistyped the name of a data skipping index. Closes [#29698](https://github.com/ClickHouse/ClickHouse/issues/29698). [#34764](https://github.com/ClickHouse/ClickHouse/pull/34764) ([flynn](https://github.com/ucasfl)).
|
||||
* Support `remote()`/`cluster()` table functions for `parallel_distributed_insert_select`. [#34728](https://github.com/ClickHouse/ClickHouse/pull/34728) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Do not reset logging that configured via `--log-file`/`--errorlog-file` command line options in case of empty configuration in the config file. [#34718](https://github.com/ClickHouse/ClickHouse/pull/34718) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Extract schema only once on table creation and prevent reading from local files/external sources to extract schema on each server startup. [#34684](https://github.com/ClickHouse/ClickHouse/pull/34684) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Allow specifying argument names for executable UDFs. This is necessary for formats where argument name is part of serialization, like `Native`, `JSONEachRow`. Closes [#34604](https://github.com/ClickHouse/ClickHouse/issues/34604). [#34653](https://github.com/ClickHouse/ClickHouse/pull/34653) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* `MaterializedMySQL` (experimental feature) now supports `materialized_mysql_tables_list` (a comma-separated list of MySQL database tables, which will be replicated by the MaterializedMySQL database engine. Default value: empty list — means all the tables will be replicated), mentioned at [#32977](https://github.com/ClickHouse/ClickHouse/issues/32977). [#34487](https://github.com/ClickHouse/ClickHouse/pull/34487) ([zzsmdfj](https://github.com/zzsmdfj)).
|
||||
* Improve OpenTelemetry span logs for INSERT operation on distributed table. [#34480](https://github.com/ClickHouse/ClickHouse/pull/34480) ([Frank Chen](https://github.com/FrankChen021)).
|
||||
* Make the znode `ctime` and `mtime` consistent between servers in ClickHouse Keeper. [#33441](https://github.com/ClickHouse/ClickHouse/pull/33441) ([小路](https://github.com/nicelulu)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
* Package repository is migrated to JFrog Artifactory (**Mikhail f. Shiryaev**).
|
||||
* Randomize some settings in functional tests, so more possible combinations of settings will be tested. This is yet another fuzzing method to ensure better test coverage. This closes [#32268](https://github.com/ClickHouse/ClickHouse/issues/32268). [#34092](https://github.com/ClickHouse/ClickHouse/pull/34092) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Drop PVS-Studio from our CI. [#34680](https://github.com/ClickHouse/ClickHouse/pull/34680) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add an ability to build stripped binaries with CMake. In previous versions it was performed by dh-tools. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)).
|
||||
* Smaller "fat-free" `clickhouse-keeper` build. [#35031](https://github.com/ClickHouse/ClickHouse/pull/35031) ([alesapin](https://github.com/alesapin)).
|
||||
* Use @robot-clickhouse as an author and committer for PRs like https://github.com/ClickHouse/ClickHouse/pull/34685. [#34793](https://github.com/ClickHouse/ClickHouse/pull/34793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Limit DWARF version for debug info by 4 max, because our internal stack symbolizer cannot parse DWARF version 5. This makes sense if you compile ClickHouse with clang-15. [#34777](https://github.com/ClickHouse/ClickHouse/pull/34777) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove `clickhouse-test` debian package as unneeded complication. CI use tests from repository and standalone testing via deb package is no longer supported. [#34606](https://github.com/ClickHouse/ClickHouse/pull/34606) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
|
||||
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
|
||||
|
||||
* A fix for HDFS integration: When the inner buffer size is too small, NEED_MORE_INPUT in `HadoopSnappyDecoder` will run multi times (>=3) for one compressed block. This makes the input data be copied into the wrong place in `HadoopSnappyDecoder::buffer`. [#35116](https://github.com/ClickHouse/ClickHouse/pull/35116) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Ignore obsolete grants in ATTACH GRANT statements. This PR fixes [#34815](https://github.com/ClickHouse/ClickHouse/issues/34815). [#34855](https://github.com/ClickHouse/ClickHouse/pull/34855) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix segfault in Postgres database when getting create table query if database was created using named collections. Closes [#35312](https://github.com/ClickHouse/ClickHouse/issues/35312). [#35313](https://github.com/ClickHouse/ClickHouse/pull/35313) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix partial merge join duplicate rows bug, close [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009). [#35311](https://github.com/ClickHouse/ClickHouse/pull/35311) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using bzip2 compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35300](https://github.com/ClickHouse/ClickHouse/pull/35300) ([Kruglov Pavel](https://github.com/Avogar)). While using lz4 compression with a small max_read_buffer_size setting value. [#35296](https://github.com/ClickHouse/ClickHouse/pull/35296) ([Kruglov Pavel](https://github.com/Avogar)). While using lzma compression with small `max_read_buffer_size` setting value. [#35295](https://github.com/ClickHouse/ClickHouse/pull/35295) ([Kruglov Pavel](https://github.com/Avogar)). While using `brotli` compression with a small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35281](https://github.com/ClickHouse/ClickHouse/pull/35281) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix possible segfault in `JSONEachRow` schema inference. [#35291](https://github.com/ClickHouse/ClickHouse/pull/35291) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix `CHECK TABLE` query in case when sparse columns are enabled in table. [#35274](https://github.com/ClickHouse/ClickHouse/pull/35274) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Avoid std::terminate in case of exception in reading from remote VFS. [#35257](https://github.com/ClickHouse/ClickHouse/pull/35257) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix reading port from config, close [#34776](https://github.com/ClickHouse/ClickHouse/issues/34776). [#35193](https://github.com/ClickHouse/ClickHouse/pull/35193) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix error in query with `WITH TOTALS` in case if `HAVING` returned empty result. This fixes [#33711](https://github.com/ClickHouse/ClickHouse/issues/33711). [#35186](https://github.com/ClickHouse/ClickHouse/pull/35186) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix a corner case of `replaceRegexpAll`, close [#35117](https://github.com/ClickHouse/ClickHouse/issues/35117). [#35182](https://github.com/ClickHouse/ClickHouse/pull/35182) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Schema inference didn't work properly on case of `INSERT INTO FUNCTION s3(...) FROM ...`, it tried to read schema from s3 file instead of from select query. [#35176](https://github.com/ClickHouse/ClickHouse/pull/35176) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix MaterializedPostgreSQL (experimental feature) `table overrides` for partition by, etc. Closes [#35048](https://github.com/ClickHouse/ClickHouse/issues/35048). [#35162](https://github.com/ClickHouse/ClickHouse/pull/35162) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix MaterializedPostgreSQL (experimental feature) adding new table to replication (ATTACH TABLE) after manually removing (DETACH TABLE). Closes [#33800](https://github.com/ClickHouse/ClickHouse/issues/33800). Closes [#34922](https://github.com/ClickHouse/ClickHouse/issues/34922). Closes [#34315](https://github.com/ClickHouse/ClickHouse/issues/34315). [#35158](https://github.com/ClickHouse/ClickHouse/pull/35158) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix partition pruning error when non-monotonic function is used with IN operator. This fixes [#35136](https://github.com/ClickHouse/ClickHouse/issues/35136). [#35146](https://github.com/ClickHouse/ClickHouse/pull/35146) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed slightly incorrect translation of YAML configs to XML. [#35135](https://github.com/ClickHouse/ClickHouse/pull/35135) ([Miel Donkers](https://github.com/mdonkers)).
|
||||
* Fix `optimize_skip_unused_shards_rewrite_in` for signed columns and negative values. [#35134](https://github.com/ClickHouse/ClickHouse/pull/35134) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* The `update_lag` external dictionary configuration option was unusable showing the error message ``Unexpected key `update_lag` in dictionary source configuration``. [#35089](https://github.com/ClickHouse/ClickHouse/pull/35089) ([Jason Chu](https://github.com/1lann)).
|
||||
* Avoid possible deadlock on server shutdown. [#35081](https://github.com/ClickHouse/ClickHouse/pull/35081) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix missing alias after function is optimized to a subcolumn when setting `optimize_functions_to_subcolumns` is enabled. Closes [#33798](https://github.com/ClickHouse/ClickHouse/issues/33798). [#35079](https://github.com/ClickHouse/ClickHouse/pull/35079) ([qieqieplus](https://github.com/qieqieplus)).
|
||||
* Fix reading from `system.asynchronous_inserts` table if there exists asynchronous insert into table function. [#35050](https://github.com/ClickHouse/ClickHouse/pull/35050) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible exception `Reading for MergeTree family tables must be done with last position boundary` (relevant to operation on remote VFS). Closes [#34979](https://github.com/ClickHouse/ClickHouse/issues/34979). [#35001](https://github.com/ClickHouse/ClickHouse/pull/35001) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix unexpected result when use -State type aggregate function in window frame. [#34999](https://github.com/ClickHouse/ClickHouse/pull/34999) ([metahys](https://github.com/metahys)).
|
||||
* Fix possible segfault in FileLog (experimental feature). Closes [#30749](https://github.com/ClickHouse/ClickHouse/issues/30749). [#34996](https://github.com/ClickHouse/ClickHouse/pull/34996) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix possible rare error `Cannot push block to port which already has data`. [#34993](https://github.com/ClickHouse/ClickHouse/pull/34993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix wrong schema inference for unquoted dates in CSV. Closes [#34768](https://github.com/ClickHouse/ClickHouse/issues/34768). [#34961](https://github.com/ClickHouse/ClickHouse/pull/34961) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Integration with Hive: Fix unexpected result when use `in` in `where` in hive query. [#34945](https://github.com/ClickHouse/ClickHouse/pull/34945) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Avoid busy polling in ClickHouse Keeper while searching for changelog files to delete. [#34931](https://github.com/ClickHouse/ClickHouse/pull/34931) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix DateTime64 conversion from PostgreSQL. Closes [#33364](https://github.com/ClickHouse/ClickHouse/issues/33364). [#34910](https://github.com/ClickHouse/ClickHouse/pull/34910) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix possible "Part directory doesn't exist" during `INSERT` into MergeTree table backed by VFS over s3. [#34876](https://github.com/ClickHouse/ClickHouse/pull/34876) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Support DDLs like CREATE USER to be executed on cross replicated cluster. [#34860](https://github.com/ClickHouse/ClickHouse/pull/34860) ([Jianmei Zhang](https://github.com/zhangjmruc)).
|
||||
* Fix bugs for multiple columns group by in `WindowView` (experimental feature). [#34859](https://github.com/ClickHouse/ClickHouse/pull/34859) ([vxider](https://github.com/Vxider)).
|
||||
* Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix bug for H3 funcs containing const columns which cause queries to fail. [#34743](https://github.com/ClickHouse/ClickHouse/pull/34743) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix `No such file or directory` with enabled `fsync_part_directory` and vertical merge. [#34739](https://github.com/ClickHouse/ClickHouse/pull/34739) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix serialization/printing for system queries `RELOAD MODEL`, `RELOAD FUNCTION`, `RESTART DISK` when used `ON CLUSTER`. Closes [#34514](https://github.com/ClickHouse/ClickHouse/issues/34514). [#34696](https://github.com/ClickHouse/ClickHouse/pull/34696) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix `allow_experimental_projection_optimization` with `enable_global_with_statement` (before it may lead to `Stack size too large` error in case of multiple expressions in `WITH` clause, and also it executes scalar subqueries again and again, so not it will be more optimal). [#34650](https://github.com/ClickHouse/ClickHouse/pull/34650) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Stop to select part for mutate when the other replica has already updated the transaction log for `ReplatedMergeTree` engine. [#34633](https://github.com/ClickHouse/ClickHouse/pull/34633) ([Jianmei Zhang](https://github.com/zhangjmruc)).
|
||||
* Fix incorrect result of trivial count query when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)).
|
||||
|
||||
|
||||
### <a id="222"></a> ClickHouse release v22.2, 2022-02-17
|
||||
|
||||
#### Upgrade Notes
|
||||
|
||||
@ -174,7 +308,7 @@
|
||||
* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
|
||||
### ClickHouse release v22.1, 2022-01-18
|
||||
### <a id="221"></a> ClickHouse release v22.1, 2022-01-18
|
||||
|
||||
#### Upgrade Notes
|
||||
|
||||
|
@ -62,8 +62,8 @@ set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a pos
|
||||
# For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html
|
||||
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
||||
|
||||
# Check that submodules are present only if source was downloaded with git
|
||||
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/boost")
|
||||
# Check that submodules are present
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/sysroot/README.md")
|
||||
message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive")
|
||||
endif ()
|
||||
|
||||
@ -248,7 +248,9 @@ endif()
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set(USE_DEBUG_HELPERS ON)
|
||||
endif()
|
||||
|
||||
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
|
||||
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
|
||||
|
||||
# Create BuildID when using lld. For other linkers it is created by default.
|
||||
if (LINKER_NAME MATCHES "lld$")
|
||||
@ -263,6 +265,14 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
|
||||
set (USE_BINARY_HASH 1)
|
||||
endif ()
|
||||
|
||||
# Allows to build stripped binary in a separate directory
|
||||
if (OBJCOPY_PATH AND READELF_PATH)
|
||||
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
|
||||
|
||||
|
||||
@ -285,8 +295,13 @@ include(cmake/cpu_features.cmake)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
|
||||
|
||||
# Reproducible builds
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
# If turned `ON`, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE().
|
||||
option(ENABLE_BUILD_PATH_MAPPING "Enable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ON)
|
||||
|
||||
if (ENABLE_BUILD_PATH_MAPPING)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
endif()
|
||||
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.12.4")
|
||||
# CMake < 3.12 doesn't support setting 20 as a C++ standard version.
|
||||
@ -340,16 +355,19 @@ if (WITH_COVERAGE AND COMPILER_GCC)
|
||||
set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage")
|
||||
endif()
|
||||
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS}")
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS}")
|
||||
|
||||
# Our built-in unwinder only supports DWARF version up to 4.
|
||||
set (DEBUG_INFO_FLAGS "-g -gdwarf-4")
|
||||
|
||||
set (CMAKE_BUILD_COLOR_MAKEFILE ON)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}")
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_CXX_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${COMMON_WARNING_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_C_FLAGS_ADD}")
|
||||
|
||||
if (COMPILER_CLANG)
|
||||
if (OS_DARWIN)
|
||||
|
@ -10,6 +10,6 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
|
||||
|
@ -15,7 +15,7 @@ The following versions of ClickHouse server are currently being supported with s
|
||||
| 20.x | :x: |
|
||||
| 21.1 | :x: |
|
||||
| 21.2 | :x: |
|
||||
| 21.3 | ✅ |
|
||||
| 21.3 | :x: |
|
||||
| 21.4 | :x: |
|
||||
| 21.5 | :x: |
|
||||
| 21.6 | :x: |
|
||||
@ -23,9 +23,11 @@ The following versions of ClickHouse server are currently being supported with s
|
||||
| 21.8 | ✅ |
|
||||
| 21.9 | :x: |
|
||||
| 21.10 | :x: |
|
||||
| 21.11 | ✅ |
|
||||
| 21.12 | ✅ |
|
||||
| 21.11 | :x: |
|
||||
| 21.12 | :x: |
|
||||
| 22.1 | ✅ |
|
||||
| 22.2 | ✅ |
|
||||
| 22.3 | ✅ |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
|
@ -17,6 +17,7 @@ set (SRCS
|
||||
terminalColors.cpp
|
||||
errnoToString.cpp
|
||||
StringRef.cpp
|
||||
safeExit.cpp
|
||||
)
|
||||
|
||||
if (ENABLE_REPLXX)
|
||||
|
@ -19,6 +19,12 @@
|
||||
#if defined(__SSE4_2__)
|
||||
#include <smmintrin.h>
|
||||
#include <nmmintrin.h>
|
||||
#define CRC_INT _mm_crc32_u64
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
||||
#include <arm_acle.h>
|
||||
#define CRC_INT __crc32cd
|
||||
#endif
|
||||
|
||||
|
||||
@ -40,9 +46,9 @@ struct StringRef
|
||||
|
||||
constexpr StringRef(const char * data_, size_t size_) : data(data_), size(size_) {}
|
||||
|
||||
StringRef(const std::string & s) : data(s.data()), size(s.size()) {}
|
||||
StringRef(const std::string & s) : data(s.data()), size(s.size()) {} /// NOLINT
|
||||
constexpr explicit StringRef(std::string_view s) : data(s.data()), size(s.size()) {}
|
||||
constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {}
|
||||
constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {} /// NOLINT
|
||||
constexpr StringRef() = default;
|
||||
|
||||
std::string toString() const { return std::string(data, size); }
|
||||
@ -205,7 +211,7 @@ struct StringRefHash64
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#if defined(CRC_INT)
|
||||
|
||||
/// Parts are taken from CityHash.
|
||||
|
||||
@ -281,13 +287,13 @@ struct CRC32Hash
|
||||
do
|
||||
{
|
||||
UInt64 word = unalignedLoad<UInt64>(pos);
|
||||
res = _mm_crc32_u64(res, word);
|
||||
res = CRC_INT(res, word);
|
||||
|
||||
pos += 8;
|
||||
} while (pos + 8 < end);
|
||||
|
||||
UInt64 word = unalignedLoad<UInt64>(end - 8); /// I'm not sure if this is normal.
|
||||
res = _mm_crc32_u64(res, word);
|
||||
res = CRC_INT(res, word);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -26,3 +26,27 @@ void insertAtEnd(std::vector<T> & dest, std::vector<T> && src)
|
||||
dest.insert(dest.end(), std::make_move_iterator(src.begin()), std::make_move_iterator(src.end()));
|
||||
src.clear();
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
void insertAtEnd(Container & dest, const Container & src)
|
||||
{
|
||||
if (src.empty())
|
||||
return;
|
||||
|
||||
dest.insert(dest.end(), src.begin(), src.end());
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
void insertAtEnd(Container & dest, Container && src)
|
||||
{
|
||||
if (src.empty())
|
||||
return;
|
||||
if (dest.empty())
|
||||
{
|
||||
dest.swap(src);
|
||||
return;
|
||||
}
|
||||
|
||||
dest.insert(dest.end(), std::make_move_iterator(src.begin()), std::make_move_iterator(src.end()));
|
||||
src.clear();
|
||||
}
|
||||
|
18
base/base/safeExit.cpp
Normal file
18
base/base/safeExit.cpp
Normal file
@ -0,0 +1,18 @@
|
||||
#if defined(OS_LINUX)
|
||||
# include <sys/syscall.h>
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <base/safeExit.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
[[noreturn]] void safeExit(int code)
|
||||
{
|
||||
#if defined(THREAD_SANITIZER) && defined(OS_LINUX)
|
||||
/// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately,
|
||||
/// while connection handling threads are still run.
|
||||
(void)syscall(SYS_exit_group, code);
|
||||
__builtin_unreachable();
|
||||
#else
|
||||
_exit(code);
|
||||
#endif
|
||||
}
|
4
base/base/safeExit.h
Normal file
4
base/base/safeExit.h
Normal file
@ -0,0 +1,4 @@
|
||||
#pragma once
|
||||
|
||||
/// _exit() with a workaround for TSan.
|
||||
[[noreturn]] void safeExit(int code);
|
@ -18,7 +18,7 @@
|
||||
#include "Common/config_version.h"
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_SENTRY
|
||||
#if USE_SENTRY && !defined(KEEPER_STANDALONE_BUILD)
|
||||
|
||||
# include <sentry.h>
|
||||
# include <stdio.h>
|
||||
|
@ -1,5 +1,13 @@
|
||||
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
|
||||
add_headers_and_sources(loggers .)
|
||||
|
||||
# Standard version depends on DBMS and works with text log
|
||||
add_library(loggers ${loggers_sources} ${loggers_headers})
|
||||
target_compile_definitions(loggers PUBLIC WITH_TEXT_LOG=1)
|
||||
target_link_libraries(loggers PRIVATE dbms clickhouse_common_io)
|
||||
target_include_directories(loggers PUBLIC ..)
|
||||
|
||||
# Lightweight version doesn't work with textlog and also doesn't depend on DBMS
|
||||
add_library(loggers_no_text_log ${loggers_sources} ${loggers_headers})
|
||||
target_link_libraries(loggers_no_text_log PRIVATE clickhouse_common_io)
|
||||
target_include_directories(loggers PUBLIC ..)
|
||||
|
@ -9,7 +9,11 @@
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Net/RemoteSyslogChannel.h>
|
||||
#include <Interpreters/TextLog.h>
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
#include <Interpreters/TextLog.h>
|
||||
#endif
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
@ -30,17 +34,21 @@ static std::string createDirectory(const std::string & file)
|
||||
return path;
|
||||
};
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
void Loggers::setTextLog(std::shared_ptr<DB::TextLog> log, int max_priority)
|
||||
{
|
||||
text_log = log;
|
||||
text_log_max_priority = max_priority;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name)
|
||||
{
|
||||
#ifdef WITH_TEXT_LOG
|
||||
if (split)
|
||||
if (auto log = text_log.lock())
|
||||
split->addTextLog(log, text_log_max_priority);
|
||||
#endif
|
||||
|
||||
auto current_logger = config.getString("logger", "");
|
||||
if (config_logger == current_logger) //-V1051
|
||||
|
@ -7,10 +7,12 @@
|
||||
#include <Poco/Util/Application.h>
|
||||
#include "OwnSplitChannel.h"
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
namespace DB
|
||||
{
|
||||
class TextLog;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace Poco::Util
|
||||
{
|
||||
@ -27,7 +29,9 @@ public:
|
||||
/// Close log files. On next log write files will be reopened.
|
||||
void closeLogs(Poco::Logger & logger);
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
void setTextLog(std::shared_ptr<DB::TextLog> log, int max_priority);
|
||||
#endif
|
||||
|
||||
private:
|
||||
Poco::AutoPtr<Poco::FileChannel> log_file;
|
||||
@ -37,8 +41,10 @@ private:
|
||||
/// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed.
|
||||
std::string config_logger;
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
std::weak_ptr<DB::TextLog> text_log;
|
||||
int text_log_max_priority = -1;
|
||||
#endif
|
||||
|
||||
Poco::AutoPtr<DB::OwnSplitChannel> split;
|
||||
};
|
||||
|
@ -20,10 +20,13 @@ namespace DB
|
||||
{
|
||||
void OwnSplitChannel::log(const Poco::Message & msg)
|
||||
{
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
auto logs_queue = CurrentThread::getInternalTextLogsQueue();
|
||||
|
||||
if (channels.empty() && (logs_queue == nullptr || msg.getPriority() > logs_queue->max_priority))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (auto * masker = SensitiveDataMasker::getInstance())
|
||||
{
|
||||
@ -86,6 +89,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
|
||||
channel.first->log(msg); // ordinary child
|
||||
}
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
auto logs_queue = CurrentThread::getInternalTextLogsQueue();
|
||||
|
||||
/// Log to "TCP queue" if message is not too noisy
|
||||
@ -137,6 +141,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
|
||||
if (text_log_locked)
|
||||
text_log_locked->add(elem);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -145,12 +150,14 @@ void OwnSplitChannel::addChannel(Poco::AutoPtr<Poco::Channel> channel, const std
|
||||
channels.emplace(name, ExtendedChannelPtrPair(std::move(channel), dynamic_cast<ExtendedLogChannel *>(channel.get())));
|
||||
}
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
void OwnSplitChannel::addTextLog(std::shared_ptr<DB::TextLog> log, int max_priority)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(text_log_mutex);
|
||||
text_log = log;
|
||||
text_log_max_priority.store(max_priority, std::memory_order_relaxed);
|
||||
}
|
||||
#endif
|
||||
|
||||
void OwnSplitChannel::setLevel(const std::string & name, int level)
|
||||
{
|
||||
|
@ -7,10 +7,12 @@
|
||||
#include <Poco/Channel.h>
|
||||
#include "ExtendedLogChannel.h"
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
namespace DB
|
||||
{
|
||||
class TextLog;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -25,7 +27,9 @@ public:
|
||||
/// Adds a child channel
|
||||
void addChannel(Poco::AutoPtr<Poco::Channel> channel, const std::string & name);
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
void addTextLog(std::shared_ptr<DB::TextLog> log, int max_priority);
|
||||
#endif
|
||||
|
||||
void setLevel(const std::string & name, int level);
|
||||
|
||||
@ -40,8 +44,10 @@ private:
|
||||
|
||||
std::mutex text_log_mutex;
|
||||
|
||||
#ifdef WITH_TEXT_LOG
|
||||
std::weak_ptr<DB::TextLog> text_log;
|
||||
std::atomic<int> text_log_max_priority = -1;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -4,11 +4,12 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
|
||||
def parse_block(block=[], options=[]):
|
||||
|
||||
#print('block is here', block)
|
||||
#show_query = False
|
||||
#show_query = options.show_query
|
||||
# print('block is here', block)
|
||||
# show_query = False
|
||||
# show_query = options.show_query
|
||||
result = []
|
||||
query = block[0].strip()
|
||||
if len(block) > 4:
|
||||
@ -20,9 +21,9 @@ def parse_block(block=[], options=[]):
|
||||
timing2 = block[2].strip().split()[1]
|
||||
timing3 = block[3].strip().split()[1]
|
||||
if options.show_queries:
|
||||
result.append( query )
|
||||
result.append(query)
|
||||
if not options.show_first_timings:
|
||||
result += [ timing1 , timing2, timing3 ]
|
||||
result += [timing1, timing2, timing3]
|
||||
else:
|
||||
result.append(timing1)
|
||||
return result
|
||||
@ -37,12 +38,12 @@ def read_stats_file(options, fname):
|
||||
|
||||
for line in f.readlines():
|
||||
|
||||
if 'SELECT' in line:
|
||||
if "SELECT" in line:
|
||||
if len(block) > 1:
|
||||
result.append( parse_block(block, options) )
|
||||
block = [ line ]
|
||||
elif 'Time:' in line:
|
||||
block.append( line )
|
||||
result.append(parse_block(block, options))
|
||||
block = [line]
|
||||
elif "Time:" in line:
|
||||
block.append(line)
|
||||
|
||||
return result
|
||||
|
||||
@ -50,7 +51,7 @@ def read_stats_file(options, fname):
|
||||
def compare_stats_files(options, arguments):
|
||||
result = []
|
||||
file_output = []
|
||||
pyplot_colors = ['y', 'b', 'g', 'r']
|
||||
pyplot_colors = ["y", "b", "g", "r"]
|
||||
for fname in arguments[1:]:
|
||||
file_output.append((read_stats_file(options, fname)))
|
||||
if len(file_output[0]) > 0:
|
||||
@ -58,65 +59,92 @@ def compare_stats_files(options, arguments):
|
||||
for idx, data_set in enumerate(file_output):
|
||||
int_result = []
|
||||
for timing in data_set:
|
||||
int_result.append(float(timing[0])) #y values
|
||||
result.append([[x for x in range(0, len(int_result)) ], int_result,
|
||||
pyplot_colors[idx] + '^' ] )
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
int_result.append(float(timing[0])) # y values
|
||||
result.append(
|
||||
[
|
||||
[x for x in range(0, len(int_result))],
|
||||
int_result,
|
||||
pyplot_colors[idx] + "^",
|
||||
]
|
||||
)
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_args():
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
|
||||
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries")
|
||||
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings")
|
||||
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode")
|
||||
|
||||
parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
|
||||
parser.add_option(
|
||||
"-q",
|
||||
"--show-queries",
|
||||
help="Show statements along with timings",
|
||||
action="store_true",
|
||||
dest="show_queries",
|
||||
)
|
||||
parser.add_option(
|
||||
"-f",
|
||||
"--show-first-timings",
|
||||
help="Show only first tries timings",
|
||||
action="store_true",
|
||||
dest="show_first_timings",
|
||||
)
|
||||
parser.add_option(
|
||||
"-c",
|
||||
"--compare-mode",
|
||||
help="Prepare output for pyplot comparing result files.",
|
||||
action="store",
|
||||
dest="compare_mode",
|
||||
)
|
||||
(options, arguments) = parser.parse_args(sys.argv)
|
||||
if len(arguments) < 2:
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
return ( options, arguments )
|
||||
return (options, arguments)
|
||||
|
||||
|
||||
def gen_pyplot_code(options, arguments):
|
||||
result = ''
|
||||
result = ""
|
||||
data_sets = compare_stats_files(options, arguments)
|
||||
for idx, data_set in enumerate(data_sets, start=0):
|
||||
x_values, y_values, line_style = data_set
|
||||
result += '\nplt.plot('
|
||||
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style )
|
||||
result += ', label=\'%s try\')' % idx
|
||||
print('import matplotlib.pyplot as plt')
|
||||
result += "\nplt.plot("
|
||||
result += "%s, %s, '%s'" % (x_values, y_values, line_style)
|
||||
result += ", label='%s try')" % idx
|
||||
print("import matplotlib.pyplot as plt")
|
||||
print(result)
|
||||
print( 'plt.xlabel(\'Try number\')' )
|
||||
print( 'plt.ylabel(\'Timing\')' )
|
||||
print( 'plt.title(\'Benchmark query timings\')' )
|
||||
print('plt.legend()')
|
||||
print('plt.show()')
|
||||
print("plt.xlabel('Try number')")
|
||||
print("plt.ylabel('Timing')")
|
||||
print("plt.title('Benchmark query timings')")
|
||||
print("plt.legend()")
|
||||
print("plt.show()")
|
||||
|
||||
|
||||
def gen_html_json(options, arguments):
|
||||
tuples = read_stats_file(options, arguments[1])
|
||||
print('{')
|
||||
print("{")
|
||||
print('"system: GreenPlum(x2),')
|
||||
print(('"version": "%s",' % '4.3.9.1'))
|
||||
print(('"version": "%s",' % "4.3.9.1"))
|
||||
print('"data_size": 10000000,')
|
||||
print('"time": "",')
|
||||
print('"comments": "",')
|
||||
print('"result":')
|
||||
print('[')
|
||||
print("[")
|
||||
for s in tuples:
|
||||
print(s)
|
||||
print(']')
|
||||
print('}')
|
||||
print("]")
|
||||
print("}")
|
||||
|
||||
|
||||
def main():
|
||||
( options, arguments ) = parse_args()
|
||||
(options, arguments) = parse_args()
|
||||
if len(arguments) > 2:
|
||||
gen_pyplot_code(options, arguments)
|
||||
else:
|
||||
gen_html_json(options, arguments)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -11,10 +11,6 @@ DATASET="${TABLE}_v1.tar.xz"
|
||||
QUERIES_FILE="queries.sql"
|
||||
TRIES=3
|
||||
|
||||
AMD64_BIN_URL="https://builds.clickhouse.com/master/amd64/clickhouse"
|
||||
AARCH64_BIN_URL="https://builds.clickhouse.com/master/aarch64/clickhouse"
|
||||
POWERPC64_BIN_URL="https://builds.clickhouse.com/master/ppc64le/clickhouse"
|
||||
|
||||
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'.
|
||||
|
||||
FASTER_DOWNLOAD=wget
|
||||
@ -33,20 +29,60 @@ fi
|
||||
mkdir -p clickhouse-benchmark-$SCALE
|
||||
pushd clickhouse-benchmark-$SCALE
|
||||
|
||||
if [[ ! -f clickhouse ]]; then
|
||||
CPU=$(uname -m)
|
||||
if [[ ($CPU == x86_64) || ($CPU == amd64) ]]; then
|
||||
$FASTER_DOWNLOAD "$AMD64_BIN_URL"
|
||||
elif [[ $CPU == aarch64 ]]; then
|
||||
$FASTER_DOWNLOAD "$AARCH64_BIN_URL"
|
||||
elif [[ $CPU == powerpc64le ]]; then
|
||||
$FASTER_DOWNLOAD "$POWERPC64_BIN_URL"
|
||||
else
|
||||
echo "Unsupported CPU type: $CPU"
|
||||
exit 1
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
|
||||
DIR=
|
||||
|
||||
if [ "${OS}" = "Linux" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="amd64"
|
||||
elif [ "${ARCH}" = "aarch64" ]
|
||||
then
|
||||
DIR="aarch64"
|
||||
elif [ "${ARCH}" = "powerpc64le" ]
|
||||
then
|
||||
DIR="powerpc64le"
|
||||
fi
|
||||
elif [ "${OS}" = "FreeBSD" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="freebsd"
|
||||
elif [ "${ARCH}" = "aarch64" ]
|
||||
then
|
||||
DIR="freebsd-aarch64"
|
||||
elif [ "${ARCH}" = "powerpc64le" ]
|
||||
then
|
||||
DIR="freebsd-powerpc64le"
|
||||
fi
|
||||
elif [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="macos"
|
||||
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
|
||||
then
|
||||
DIR="macos-aarch64"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "${DIR}" ]
|
||||
then
|
||||
echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
|
||||
echo
|
||||
echo "Will download ${URL}"
|
||||
echo
|
||||
curl -O "${URL}" && chmod a+x clickhouse || exit 1
|
||||
echo
|
||||
echo "Successfully downloaded the ClickHouse binary"
|
||||
|
||||
chmod a+x clickhouse
|
||||
|
||||
if [[ ! -f $QUERIES_FILE ]]; then
|
||||
@ -88,7 +124,12 @@ echo
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
sync
|
||||
if [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
sudo purge > /dev/null
|
||||
else
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
|
||||
fi
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
@ -104,27 +145,45 @@ echo
|
||||
echo "Benchmark complete. System info:"
|
||||
echo
|
||||
|
||||
echo '----Version, build id-----------'
|
||||
./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())"
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
cat /proc/cpuinfo | grep -i -F 'model name' | uniq
|
||||
lscpu
|
||||
echo '----Block Devices---------------'
|
||||
lsblk
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
echo '----Memory Free and Total-------'
|
||||
free -h
|
||||
echo '----Physical Memory Amount------'
|
||||
cat /proc/meminfo | grep MemTotal
|
||||
echo '----RAID Info-------------------'
|
||||
cat /proc/mdstat
|
||||
#echo '----PCI-------------------------'
|
||||
#lspci
|
||||
#echo '----All Hardware Info-----------'
|
||||
#lshw
|
||||
echo '--------------------------------'
|
||||
|
||||
if [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
echo '----Version, build id-----------'
|
||||
./clickhouse local --query "SELECT format('Version: {}', version())"
|
||||
sw_vers | grep BuildVersion
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
sysctl hw.model
|
||||
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize'
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
echo '----Memory Free and Total-------'
|
||||
vm_stat
|
||||
echo '----Physical Memory Amount------'
|
||||
ls -l /var/vm
|
||||
echo '--------------------------------'
|
||||
else
|
||||
echo '----Version, build id-----------'
|
||||
./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())"
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
cat /proc/cpuinfo | grep -i -F 'model name' | uniq
|
||||
lscpu
|
||||
echo '----Block Devices---------------'
|
||||
lsblk
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
echo '----Memory Free and Total-------'
|
||||
free -h
|
||||
echo '----Physical Memory Amount------'
|
||||
cat /proc/meminfo | grep MemTotal
|
||||
echo '----RAID Info-------------------'
|
||||
cat /proc/mdstat
|
||||
#echo '----PCI-------------------------'
|
||||
#lspci
|
||||
#echo '----All Hardware Info-----------'
|
||||
#lshw
|
||||
echo '--------------------------------'
|
||||
fi
|
||||
echo
|
||||
|
@ -2,11 +2,11 @@
|
||||
|
||||
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
|
||||
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
|
||||
SET(VERSION_REVISION 54460)
|
||||
SET(VERSION_REVISION 54461)
|
||||
SET(VERSION_MAJOR 22)
|
||||
SET(VERSION_MINOR 3)
|
||||
SET(VERSION_MINOR 4)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH 75366fc95e510b7ac76759ef670702ae5f488a51)
|
||||
SET(VERSION_DESCRIBE v22.3.1.1-testing)
|
||||
SET(VERSION_STRING 22.3.1.1)
|
||||
SET(VERSION_GITHASH 92ab33f560e638d1989c5ca543021ab53d110f5c)
|
||||
SET(VERSION_DESCRIBE v22.4.1.1-testing)
|
||||
SET(VERSION_STRING 22.4.1.1)
|
||||
# end of autochange
|
||||
|
@ -55,5 +55,5 @@ endif ()
|
||||
if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
|
||||
message(STATUS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory.
|
||||
Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}")
|
||||
Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)")
|
||||
endif ()
|
||||
|
28
cmake/strip.sh
Executable file
28
cmake/strip.sh
Executable file
@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
BINARY_PATH=$1
|
||||
BINARY_NAME=$(basename "$BINARY_PATH")
|
||||
DESTINATION_STRIPPED_DIR=$2
|
||||
OBJCOPY_PATH=${3:objcopy}
|
||||
READELF_PATH=${4:readelf}
|
||||
|
||||
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
|
||||
BUILD_ID_PREFIX=${BUILD_ID:0:2}
|
||||
BUILD_ID_SUFFIX=${BUILD_ID:2}
|
||||
|
||||
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
|
||||
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
|
||||
|
||||
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
|
||||
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
|
||||
|
||||
|
||||
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
||||
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
|
||||
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
||||
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
26
cmake/strip_binary.cmake
Normal file
26
cmake/strip_binary.cmake
Normal file
@ -0,0 +1,26 @@
|
||||
macro(clickhouse_strip_binary)
|
||||
set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH)
|
||||
|
||||
cmake_parse_arguments(STRIP "" "${oneValueArgs}" "" ${ARGN})
|
||||
|
||||
if (NOT DEFINED STRIP_TARGET)
|
||||
message(FATAL_ERROR "A target name must be provided for stripping binary")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED STRIP_BINARY_PATH)
|
||||
message(FATAL_ERROR "A binary path name must be provided for stripping binary")
|
||||
endif()
|
||||
|
||||
|
||||
if (NOT DEFINED STRIP_DESTINATION_DIR)
|
||||
message(FATAL_ERROR "Destination directory for stripped binary must be provided")
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD
|
||||
COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH}
|
||||
COMMENT "Stripping clickhouse binary" VERBATIM
|
||||
)
|
||||
|
||||
install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
|
||||
endmacro()
|
@ -169,3 +169,33 @@ if (OBJCOPY_PATH)
|
||||
else ()
|
||||
message (FATAL_ERROR "Cannot find objcopy.")
|
||||
endif ()
|
||||
|
||||
# Readelf (FIXME copypaste)
|
||||
|
||||
if (COMPILER_GCC)
|
||||
find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf")
|
||||
else ()
|
||||
find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf")
|
||||
endif ()
|
||||
|
||||
if (NOT READELF_PATH AND OS_DARWIN)
|
||||
find_program (BREW_PATH NAMES "brew")
|
||||
if (BREW_PATH)
|
||||
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
|
||||
if (LLVM_PREFIX)
|
||||
find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
|
||||
endif ()
|
||||
if (NOT READELF_PATH)
|
||||
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
|
||||
if (BINUTILS_PREFIX)
|
||||
find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (READELF_PATH)
|
||||
message (STATUS "Using readelf: ${READELF_PATH}")
|
||||
else ()
|
||||
message (FATAL_ERROR "Cannot find readelf.")
|
||||
endif ()
|
||||
|
2
contrib/icu
vendored
2
contrib/icu
vendored
@ -1 +1 @@
|
||||
Subproject commit faa2f9f9e1fe74c5ed00eba371d2830134cdbea1
|
||||
Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668
|
@ -212,7 +212,9 @@ set(ICUUC_SOURCES
|
||||
"${ICU_SOURCE_DIR}/common/ubiditransform.cpp"
|
||||
"${ICU_SOURCE_DIR}/common/pluralmap.cpp"
|
||||
"${ICU_SOURCE_DIR}/common/static_unicode_sets.cpp"
|
||||
"${ICU_SOURCE_DIR}/common/restrace.cpp")
|
||||
"${ICU_SOURCE_DIR}/common/restrace.cpp"
|
||||
"${ICU_SOURCE_DIR}/common/emojiprops.cpp"
|
||||
"${ICU_SOURCE_DIR}/common/lstmbe.cpp")
|
||||
|
||||
set(ICUI18N_SOURCES
|
||||
"${ICU_SOURCE_DIR}/i18n/ucln_in.cpp"
|
||||
@ -398,7 +400,6 @@ set(ICUI18N_SOURCES
|
||||
"${ICU_SOURCE_DIR}/i18n/sharedbreakiterator.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/scientificnumberformatter.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/dayperiodrules.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/nounit.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/number_affixutils.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/number_compact.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/number_decimalquantity.cpp"
|
||||
@ -446,12 +447,21 @@ set(ICUI18N_SOURCES
|
||||
"${ICU_SOURCE_DIR}/i18n/formattedvalue.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/formattedval_iterimpl.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/formattedval_sbimpl.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp")
|
||||
"${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/measunit_extra.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/number_symbolswrapper.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/number_usageprefs.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/numrange_capi.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/pluralranges.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/units_complexconverter.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/units_converter.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/units_data.cpp"
|
||||
"${ICU_SOURCE_DIR}/i18n/units_router.cpp")
|
||||
|
||||
file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ")
|
||||
enable_language(ASM)
|
||||
set(ICUDATA_SOURCES
|
||||
"${ICUDATA_SOURCE_DIR}/icudt66l_dat.S"
|
||||
"${ICUDATA_SOURCE_DIR}/icudt70l_dat.S"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" # Without this cmake can incorrectly detects library type (OBJECT) instead of SHARED/STATIC
|
||||
)
|
||||
|
||||
|
2
contrib/icudata
vendored
2
contrib/icudata
vendored
@ -1 +1 @@
|
||||
Subproject commit f020820388e3faafb44cc643574a2d563dfde572
|
||||
Subproject commit 72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5
|
2
contrib/jemalloc
vendored
2
contrib/jemalloc
vendored
@ -1 +1 @@
|
||||
Subproject commit ca709c3139f77f4c00a903cdee46d71e9028f6c6
|
||||
Subproject commit 78b58379c854a639df79beb3289351129d863d4b
|
@ -117,8 +117,11 @@ if (OS_DARWIN)
|
||||
endif ()
|
||||
|
||||
add_library(_jemalloc ${SRCS})
|
||||
# First include jemalloc-cmake files, to override anything that jemalloc has.
|
||||
# (for example if you were trying to build jemalloc inside contrib/jemalloc you
|
||||
# will have some files that may be out of date)
|
||||
target_include_directories(_jemalloc PUBLIC include)
|
||||
target_include_directories(_jemalloc PRIVATE "${LIBRARY_DIR}/include")
|
||||
target_include_directories(_jemalloc SYSTEM PUBLIC include)
|
||||
|
||||
set (JEMALLOC_INCLUDE_PREFIX)
|
||||
# OS_
|
||||
|
@ -4,12 +4,21 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wredundant-decls"
|
||||
#endif
|
||||
|
||||
#include <jemalloc/jemalloc_defs.h>
|
||||
#include <jemalloc/jemalloc_rename.h>
|
||||
#include <jemalloc/jemalloc_macros.h>
|
||||
#include <jemalloc/jemalloc_protos.h>
|
||||
#include <jemalloc/jemalloc_typedefs.h>
|
||||
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -1,4 +1,8 @@
|
||||
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
|
||||
# During cross-compilation in our CI we have to use llvm-tblgen and other building tools
|
||||
# tools to be build for host architecture and everything else for target architecture (e.g. AArch64)
|
||||
# Possible workaround is to use llvm-tblgen from some package...
|
||||
# But lets just enable LLVM for native builds
|
||||
if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
||||
else()
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
||||
@ -22,9 +26,6 @@ set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm")
|
||||
set (REQUIRED_LLVM_LIBRARIES
|
||||
LLVMExecutionEngine
|
||||
LLVMRuntimeDyld
|
||||
LLVMX86CodeGen
|
||||
LLVMX86Desc
|
||||
LLVMX86Info
|
||||
LLVMAsmPrinter
|
||||
LLVMDebugInfoDWARF
|
||||
LLVMGlobalISel
|
||||
@ -56,6 +57,12 @@ set (REQUIRED_LLVM_LIBRARIES
|
||||
LLVMDemangle
|
||||
)
|
||||
|
||||
if (ARCH_AMD64)
|
||||
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)
|
||||
elseif (ARCH_AARCH64)
|
||||
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
|
||||
endif ()
|
||||
|
||||
#function(llvm_libs_all REQUIRED_LLVM_LIBRARIES)
|
||||
# llvm_map_components_to_libnames (result all)
|
||||
# if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result)
|
||||
|
@ -3,25 +3,25 @@ compilers and build settings. Correctly configured Docker daemon is single depen
|
||||
|
||||
Usage:
|
||||
|
||||
Build deb package with `clang-11` in `debug` mode:
|
||||
Build deb package with `clang-14` in `debug` mode:
|
||||
```
|
||||
$ mkdir deb/test_output
|
||||
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-11 --build-type=debug
|
||||
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug
|
||||
$ ls -l deb/test_output
|
||||
-rw-r--r-- 1 root root 3730 clickhouse-client_18.14.2+debug_all.deb
|
||||
-rw-r--r-- 1 root root 84221888 clickhouse-common-static_18.14.2+debug_amd64.deb
|
||||
-rw-r--r-- 1 root root 255967314 clickhouse-common-static-dbg_18.14.2+debug_amd64.deb
|
||||
-rw-r--r-- 1 root root 14940 clickhouse-server_18.14.2+debug_all.deb
|
||||
-rw-r--r-- 1 root root 340206010 clickhouse-server-base_18.14.2+debug_amd64.deb
|
||||
-rw-r--r-- 1 root root 7900 clickhouse-server-common_18.14.2+debug_all.deb
|
||||
-rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb
|
||||
-rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb
|
||||
-rw-r--r-- 1 root root 255967314 clickhouse-common-static-dbg_22.2.2+debug_amd64.deb
|
||||
-rw-r--r-- 1 root root 14940 clickhouse-server_22.2.2+debug_all.deb
|
||||
-rw-r--r-- 1 root root 340206010 clickhouse-server-base_22.2.2+debug_amd64.deb
|
||||
-rw-r--r-- 1 root root 7900 clickhouse-server-common_22.2.2+debug_all.deb
|
||||
|
||||
```
|
||||
|
||||
Build ClickHouse binary with `clang-11` and `address` sanitizer in `relwithdebuginfo`
|
||||
Build ClickHouse binary with `clang-14` and `address` sanitizer in `relwithdebuginfo`
|
||||
mode:
|
||||
```
|
||||
$ mkdir $HOME/some_clickhouse
|
||||
$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-11 --sanitizer=address
|
||||
$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-14 --sanitizer=address
|
||||
$ ls -l $HOME/some_clickhouse
|
||||
-rwxr-xr-x 1 root root 787061952 clickhouse
|
||||
lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse
|
||||
|
@ -95,6 +95,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
|
||||
&& apt-get install gcc-11 g++-11 --yes \
|
||||
&& apt-get clean
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
ARG NFPM_VERSION=2.15.0
|
||||
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \
|
||||
&& dpkg -i /tmp/nfpm.deb \
|
||||
&& rm /tmp/nfpm.deb
|
||||
|
||||
COPY build.sh /
|
||||
CMD ["bash", "-c", "/build.sh 2>&1 | ts"]
|
||||
CMD ["bash", "-c", "/build.sh 2>&1"]
|
||||
|
@ -1,7 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
exec &> >(ts)
|
||||
set -x -e
|
||||
|
||||
cache_status () {
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
}
|
||||
|
||||
mkdir -p build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64
|
||||
@ -19,15 +25,23 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
|
||||
env
|
||||
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
# clear cache stats
|
||||
ccache --zero-stats ||:
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
|
||||
# No quotes because I want it to expand to nothing if empty.
|
||||
# shellcheck disable=SC2086
|
||||
ninja $NINJA_FLAGS clickhouse-bundle
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
|
||||
if [ -n "$MAKE_DEB" ]; then
|
||||
rm -rf /build/packages/root
|
||||
# No quotes because I want it to expand to nothing if empty.
|
||||
# shellcheck disable=SC2086
|
||||
DESTDIR=/build/packages/root ninja $NINJA_FLAGS install
|
||||
bash -x /build/packages/build
|
||||
fi
|
||||
|
||||
mv ./programs/clickhouse* /output
|
||||
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
|
||||
@ -84,8 +98,7 @@ fi
|
||||
# ../docker/packager/other/fuzzer.sh
|
||||
# fi
|
||||
|
||||
ccache --show-config ||:
|
||||
ccache --show-stats ||:
|
||||
cache_status
|
||||
|
||||
if [ "${CCACHE_DEBUG:-}" == "1" ]
|
||||
then
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
#-*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
import subprocess
|
||||
import os
|
||||
import argparse
|
||||
@ -8,36 +8,39 @@ import sys
|
||||
|
||||
SCRIPT_PATH = os.path.realpath(__file__)
|
||||
|
||||
IMAGE_MAP = {
|
||||
"deb": "clickhouse/deb-builder",
|
||||
"binary": "clickhouse/binary-builder",
|
||||
}
|
||||
|
||||
def check_image_exists_locally(image_name):
|
||||
try:
|
||||
output = subprocess.check_output("docker images -q {} 2> /dev/null".format(image_name), shell=True)
|
||||
output = subprocess.check_output(
|
||||
f"docker images -q {image_name} 2> /dev/null", shell=True
|
||||
)
|
||||
return output != ""
|
||||
except subprocess.CalledProcessError as ex:
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
|
||||
def pull_image(image_name):
|
||||
try:
|
||||
subprocess.check_call("docker pull {}".format(image_name), shell=True)
|
||||
subprocess.check_call(f"docker pull {image_name}", shell=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as ex:
|
||||
logging.info("Cannot pull image {}".format(image_name))
|
||||
except subprocess.CalledProcessError:
|
||||
logging.info(f"Cannot pull image {image_name}".format())
|
||||
return False
|
||||
|
||||
|
||||
def build_image(image_name, filepath):
|
||||
context = os.path.dirname(filepath)
|
||||
build_cmd = "docker build --network=host -t {} -f {} {}".format(image_name, filepath, context)
|
||||
logging.info("Will build image with cmd: '{}'".format(build_cmd))
|
||||
build_cmd = f"docker build --network=host -t {image_name} -f {filepath} {context}"
|
||||
logging.info("Will build image with cmd: '%s'", build_cmd)
|
||||
subprocess.check_call(
|
||||
build_cmd,
|
||||
shell=True,
|
||||
)
|
||||
|
||||
def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version):
|
||||
|
||||
def run_docker_image_with_env(
|
||||
image_name, output, env_variables, ch_root, ccache_dir, docker_image_version
|
||||
):
|
||||
env_part = " -e ".join(env_variables)
|
||||
if env_part:
|
||||
env_part = " -e " + env_part
|
||||
@ -47,28 +50,52 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
|
||||
else:
|
||||
interactive = ""
|
||||
|
||||
cmd = "docker run --network=host --rm --volume={output_path}:/output --volume={ch_root}:/build --volume={ccache_dir}:/ccache {env} {interactive} {img_name}".format(
|
||||
output_path=output,
|
||||
ch_root=ch_root,
|
||||
ccache_dir=ccache_dir,
|
||||
env=env_part,
|
||||
img_name=image_name + ":" + docker_image_version,
|
||||
interactive=interactive
|
||||
cmd = (
|
||||
f"docker run --network=host --rm --volume={output}:/output "
|
||||
f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} "
|
||||
f"{interactive} {image_name}:{docker_image_version}"
|
||||
)
|
||||
|
||||
logging.info("Will build ClickHouse pkg with cmd: '{}'".format(cmd))
|
||||
logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd)
|
||||
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
|
||||
|
||||
def is_release_build(build_type, package_type, sanitizer, split_binary):
|
||||
return (
|
||||
build_type == ""
|
||||
and package_type == "deb"
|
||||
and sanitizer == ""
|
||||
and not split_binary
|
||||
)
|
||||
|
||||
|
||||
def parse_env_variables(
|
||||
build_type,
|
||||
compiler,
|
||||
sanitizer,
|
||||
package_type,
|
||||
image_type,
|
||||
cache,
|
||||
distcc_hosts,
|
||||
split_binary,
|
||||
clang_tidy,
|
||||
version,
|
||||
author,
|
||||
official,
|
||||
additional_pkgs,
|
||||
with_coverage,
|
||||
with_binaries,
|
||||
):
|
||||
DARWIN_SUFFIX = "-darwin"
|
||||
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
|
||||
ARM_SUFFIX = "-aarch64"
|
||||
FREEBSD_SUFFIX = "-freebsd"
|
||||
PPC_SUFFIX = '-ppc64le'
|
||||
PPC_SUFFIX = "-ppc64le"
|
||||
|
||||
result = []
|
||||
cmake_flags = ['$CMAKE_FLAGS']
|
||||
result.append("OUTPUT_DIR=/output")
|
||||
cmake_flags = ["$CMAKE_FLAGS"]
|
||||
|
||||
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
|
||||
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
|
||||
@ -77,46 +104,72 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
|
||||
|
||||
if is_cross_darwin:
|
||||
cc = compiler[:-len(DARWIN_SUFFIX)]
|
||||
cc = compiler[: -len(DARWIN_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/x86_64-apple-darwin-install_name_tool")
|
||||
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/"
|
||||
"x86_64-apple-darwin-install_name_tool"
|
||||
)
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib"
|
||||
)
|
||||
cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake"
|
||||
)
|
||||
elif is_cross_darwin_arm:
|
||||
cc = compiler[:-len(DARWIN_ARM_SUFFIX)]
|
||||
cc = compiler[: -len(DARWIN_ARM_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool")
|
||||
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/"
|
||||
"aarch64-apple-darwin-install_name_tool"
|
||||
)
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib"
|
||||
)
|
||||
cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld")
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake")
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake"
|
||||
)
|
||||
elif is_cross_arm:
|
||||
cc = compiler[:-len(ARM_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
|
||||
result.append("DEB_ARCH_FLAG=-aarm64")
|
||||
cc = compiler[: -len(ARM_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake"
|
||||
)
|
||||
result.append("DEB_ARCH=arm64")
|
||||
elif is_cross_freebsd:
|
||||
cc = compiler[:-len(FREEBSD_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake")
|
||||
cc = compiler[: -len(FREEBSD_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake"
|
||||
)
|
||||
elif is_cross_ppc:
|
||||
cc = compiler[:-len(PPC_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake")
|
||||
cc = compiler[: -len(PPC_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
|
||||
)
|
||||
else:
|
||||
cc = compiler
|
||||
result.append("DEB_ARCH_FLAG=-aamd64")
|
||||
result.append("DEB_ARCH=amd64")
|
||||
|
||||
cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++')
|
||||
cxx = cc.replace("gcc", "g++").replace("clang", "clang++")
|
||||
|
||||
if image_type == "deb":
|
||||
result.append("DEB_CC={}".format(cc))
|
||||
result.append("DEB_CXX={}".format(cxx))
|
||||
# For building fuzzers
|
||||
result.append("CC={}".format(cc))
|
||||
result.append("CXX={}".format(cxx))
|
||||
elif image_type == "binary":
|
||||
result.append("CC={}".format(cc))
|
||||
result.append("CXX={}".format(cxx))
|
||||
cmake_flags.append('-DCMAKE_C_COMPILER=`which {}`'.format(cc))
|
||||
cmake_flags.append('-DCMAKE_CXX_COMPILER=`which {}`'.format(cxx))
|
||||
result.append("MAKE_DEB=true")
|
||||
cmake_flags.append("-DENABLE_TESTS=0")
|
||||
cmake_flags.append("-DENABLE_UTILS=0")
|
||||
cmake_flags.append("-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON")
|
||||
cmake_flags.append("-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON")
|
||||
cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
|
||||
if is_release_build(build_type, package_type, sanitizer, split_binary):
|
||||
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
|
||||
|
||||
result.append(f"CC={cc}")
|
||||
result.append(f"CXX={cxx}")
|
||||
cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}")
|
||||
cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}")
|
||||
|
||||
# Create combined output archive for split build and for performance tests.
|
||||
if package_type == "performance":
|
||||
@ -126,12 +179,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
result.append("COMBINED_OUTPUT=shared_build")
|
||||
|
||||
if sanitizer:
|
||||
result.append("SANITIZER={}".format(sanitizer))
|
||||
result.append(f"SANITIZER={sanitizer}")
|
||||
if build_type:
|
||||
result.append("BUILD_TYPE={}".format(build_type))
|
||||
result.append(f"BUILD_TYPE={build_type.capitalize()}")
|
||||
else:
|
||||
result.append("BUILD_TYPE=None")
|
||||
|
||||
if cache == 'distcc':
|
||||
result.append("CCACHE_PREFIX={}".format(cache))
|
||||
if cache == "distcc":
|
||||
result.append(f"CCACHE_PREFIX={cache}")
|
||||
|
||||
if cache:
|
||||
result.append("CCACHE_DIR=/ccache")
|
||||
@ -142,109 +197,188 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
# result.append("CCACHE_UMASK=777")
|
||||
|
||||
if distcc_hosts:
|
||||
hosts_with_params = ["{}/24,lzo".format(host) for host in distcc_hosts] + ["localhost/`nproc`"]
|
||||
result.append('DISTCC_HOSTS="{}"'.format(" ".join(hosts_with_params)))
|
||||
hosts_with_params = [f"{host}/24,lzo" for host in distcc_hosts] + [
|
||||
"localhost/`nproc`"
|
||||
]
|
||||
result.append('DISTCC_HOSTS="' + " ".join(hosts_with_params) + '"')
|
||||
elif cache == "distcc":
|
||||
result.append('DISTCC_HOSTS="{}"'.format("localhost/`nproc`"))
|
||||
result.append('DISTCC_HOSTS="localhost/`nproc`"')
|
||||
|
||||
if alien_pkgs:
|
||||
result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'")
|
||||
if additional_pkgs:
|
||||
result.append("MAKE_APK=true")
|
||||
result.append("MAKE_RPM=true")
|
||||
result.append("MAKE_TGZ=true")
|
||||
|
||||
if with_binaries == "programs":
|
||||
result.append('BINARY_OUTPUT=programs')
|
||||
result.append("BINARY_OUTPUT=programs")
|
||||
elif with_binaries == "tests":
|
||||
result.append('ENABLE_TESTS=1')
|
||||
result.append('BINARY_OUTPUT=tests')
|
||||
cmake_flags.append('-DENABLE_TESTS=1')
|
||||
result.append("ENABLE_TESTS=1")
|
||||
result.append("BINARY_OUTPUT=tests")
|
||||
cmake_flags.append("-DENABLE_TESTS=1")
|
||||
|
||||
if split_binary:
|
||||
cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1')
|
||||
cmake_flags.append(
|
||||
"-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 "
|
||||
"-DCLICKHOUSE_SPLIT_BINARY=1"
|
||||
)
|
||||
# We can't always build utils because it requires too much space, but
|
||||
# we have to build them at least in some way in CI. The split build is
|
||||
# probably the least heavy disk-wise.
|
||||
cmake_flags.append('-DENABLE_UTILS=1')
|
||||
cmake_flags.append("-DENABLE_UTILS=1")
|
||||
|
||||
if clang_tidy:
|
||||
cmake_flags.append('-DENABLE_CLANG_TIDY=1')
|
||||
cmake_flags.append('-DENABLE_UTILS=1')
|
||||
cmake_flags.append('-DENABLE_TESTS=1')
|
||||
cmake_flags.append('-DENABLE_EXAMPLES=1')
|
||||
cmake_flags.append("-DENABLE_CLANG_TIDY=1")
|
||||
cmake_flags.append("-DENABLE_UTILS=1")
|
||||
cmake_flags.append("-DENABLE_TESTS=1")
|
||||
cmake_flags.append("-DENABLE_EXAMPLES=1")
|
||||
# Don't stop on first error to find more clang-tidy errors in one run.
|
||||
result.append('NINJA_FLAGS=-k0')
|
||||
result.append("NINJA_FLAGS=-k0")
|
||||
|
||||
if with_coverage:
|
||||
cmake_flags.append('-DWITH_COVERAGE=1')
|
||||
cmake_flags.append("-DWITH_COVERAGE=1")
|
||||
|
||||
if version:
|
||||
result.append("VERSION_STRING='{}'".format(version))
|
||||
result.append(f"VERSION_STRING='{version}'")
|
||||
|
||||
if author:
|
||||
result.append("AUTHOR='{}'".format(author))
|
||||
result.append(f"AUTHOR='{author}'")
|
||||
|
||||
if official:
|
||||
cmake_flags.append('-DYANDEX_OFFICIAL_BUILD=1')
|
||||
cmake_flags.append("-DYANDEX_OFFICIAL_BUILD=1")
|
||||
|
||||
result.append('CMAKE_FLAGS="' + ' '.join(cmake_flags) + '"')
|
||||
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse building script using prebuilt Docker image")
|
||||
# 'performance' creates a combined .tgz with server and configs to be used for performance test.
|
||||
parser.add_argument("--package-type", choices=['deb', 'binary', 'performance'], required=True)
|
||||
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="ClickHouse building script using prebuilt Docker image",
|
||||
)
|
||||
# 'performance' creates a combined .tgz with server
|
||||
# and configs to be used for performance test.
|
||||
parser.add_argument(
|
||||
"--package-type",
|
||||
choices=("deb", "binary", "performance"),
|
||||
required=True,
|
||||
help="a build type",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--clickhouse-repo-path",
|
||||
default=os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir
|
||||
),
|
||||
help="ClickHouse git repository",
|
||||
)
|
||||
parser.add_argument("--output-dir", required=True)
|
||||
parser.add_argument("--build-type", choices=("debug", ""), default="")
|
||||
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
|
||||
"clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64",
|
||||
"clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", "clang-13-ppc64le",
|
||||
"clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13")
|
||||
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
|
||||
parser.add_argument(
|
||||
"--compiler",
|
||||
choices=(
|
||||
"clang-11",
|
||||
"clang-11-darwin",
|
||||
"clang-11-darwin-aarch64",
|
||||
"clang-11-aarch64",
|
||||
"clang-12",
|
||||
"clang-12-darwin",
|
||||
"clang-12-darwin-aarch64",
|
||||
"clang-12-aarch64",
|
||||
"clang-13",
|
||||
"clang-13-darwin",
|
||||
"clang-13-darwin-aarch64",
|
||||
"clang-13-aarch64",
|
||||
"clang-13-ppc64le",
|
||||
"clang-11-freebsd",
|
||||
"clang-12-freebsd",
|
||||
"clang-13-freebsd",
|
||||
"gcc-11",
|
||||
),
|
||||
default="clang-13",
|
||||
help="a compiler to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sanitizer",
|
||||
choices=("address", "thread", "memory", "undefined", ""),
|
||||
default="",
|
||||
)
|
||||
parser.add_argument("--split-binary", action="store_true")
|
||||
parser.add_argument("--clang-tidy", action="store_true")
|
||||
parser.add_argument("--cache", choices=("", "ccache", "distcc"), default="")
|
||||
parser.add_argument("--ccache_dir", default= os.getenv("HOME", "") + '/.ccache')
|
||||
parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="")
|
||||
parser.add_argument(
|
||||
"--ccache_dir",
|
||||
default=os.getenv("HOME", "") + "/.ccache",
|
||||
help="a directory with ccache",
|
||||
)
|
||||
parser.add_argument("--distcc-hosts", nargs="+")
|
||||
parser.add_argument("--force-build-image", action="store_true")
|
||||
parser.add_argument("--version")
|
||||
parser.add_argument("--author", default="clickhouse")
|
||||
parser.add_argument("--author", default="clickhouse", help="a package author")
|
||||
parser.add_argument("--official", action="store_true")
|
||||
parser.add_argument("--alien-pkgs", nargs='+', default=[])
|
||||
parser.add_argument("--additional-pkgs", action="store_true")
|
||||
parser.add_argument("--with-coverage", action="store_true")
|
||||
parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="")
|
||||
parser.add_argument("--docker-image-version", default="latest")
|
||||
parser.add_argument(
|
||||
"--with-binaries", choices=("programs", "tests", ""), default=""
|
||||
)
|
||||
parser.add_argument(
|
||||
"--docker-image-version", default="latest", help="docker image tag to use"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
if not os.path.isabs(args.output_dir):
|
||||
args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir))
|
||||
|
||||
image_type = 'binary' if args.package_type == 'performance' else args.package_type
|
||||
image_name = IMAGE_MAP[image_type]
|
||||
image_type = "binary" if args.package_type == "performance" else args.package_type
|
||||
image_name = "clickhouse/binary-builder"
|
||||
|
||||
if not os.path.isabs(args.clickhouse_repo_path):
|
||||
ch_root = os.path.abspath(os.path.join(os.getcwd(), args.clickhouse_repo_path))
|
||||
else:
|
||||
ch_root = args.clickhouse_repo_path
|
||||
|
||||
if args.alien_pkgs and not image_type == "deb":
|
||||
raise Exception("Can add alien packages only in deb build")
|
||||
if args.additional_pkgs and image_type != "deb":
|
||||
raise Exception("Can build additional packages only in deb build")
|
||||
|
||||
if args.with_binaries != "" and not image_type == "deb":
|
||||
if args.with_binaries != "" and image_type != "deb":
|
||||
raise Exception("Can add additional binaries only in deb build")
|
||||
|
||||
if args.with_binaries != "" and image_type == "deb":
|
||||
logging.info("Should place {} to output".format(args.with_binaries))
|
||||
logging.info("Should place %s to output", args.with_binaries)
|
||||
|
||||
dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile")
|
||||
image_with_version = image_name + ":" + args.docker_image_version
|
||||
if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image:
|
||||
if (
|
||||
image_type != "freebsd"
|
||||
and not check_image_exists_locally(image_name)
|
||||
or args.force_build_image
|
||||
):
|
||||
if not pull_image(image_with_version) or args.force_build_image:
|
||||
build_image(image_with_version, dockerfile)
|
||||
env_prepared = parse_env_variables(
|
||||
args.build_type, args.compiler, args.sanitizer, args.package_type, image_type,
|
||||
args.cache, args.distcc_hosts, args.split_binary, args.clang_tidy,
|
||||
args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries)
|
||||
args.build_type,
|
||||
args.compiler,
|
||||
args.sanitizer,
|
||||
args.package_type,
|
||||
image_type,
|
||||
args.cache,
|
||||
args.distcc_hosts,
|
||||
args.split_binary,
|
||||
args.clang_tidy,
|
||||
args.version,
|
||||
args.author,
|
||||
args.official,
|
||||
args.additional_pkgs,
|
||||
args.with_coverage,
|
||||
args.with_binaries,
|
||||
)
|
||||
|
||||
run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir, args.docker_image_version)
|
||||
logging.info("Output placed into {}".format(args.output_dir))
|
||||
run_docker_image_with_env(
|
||||
image_name,
|
||||
args.output_dir,
|
||||
env_prepared,
|
||||
ch_root,
|
||||
args.ccache_dir,
|
||||
args.docker_image_version,
|
||||
)
|
||||
logging.info("Output placed into %s", args.output_dir)
|
||||
|
@ -4,7 +4,7 @@ FROM ubuntu:20.04
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
|
||||
ARG repository="deb https://packages.clickhouse.com/deb stable main"
|
||||
ARG version=22.1.1.*
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
@ -58,7 +58,7 @@ RUN groupadd -r clickhouse --gid=101 \
|
||||
wget \
|
||||
tzdata \
|
||||
&& mkdir -p /etc/apt/sources.list.d \
|
||||
&& apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \
|
||||
&& apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \
|
||||
&& echo $repository > /etc/apt/sources.list.d/clickhouse.list \
|
||||
&& if [ -n "$deb_location_url" ]; then \
|
||||
echo "installing from custom url with deb packages: $deb_location_url" \
|
||||
|
@ -263,9 +263,20 @@ function run_tests
|
||||
if [[ $NPROC == 0 ]]; then
|
||||
NPROC=1
|
||||
fi
|
||||
time clickhouse-test --hung-check -j "${NPROC}" --order=random \
|
||||
--fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \
|
||||
-- "$FASTTEST_FOCUS" 2>&1 \
|
||||
|
||||
local test_opts=(
|
||||
--hung-check
|
||||
--fast-tests-only
|
||||
--no-long
|
||||
--testname
|
||||
--shard
|
||||
--zookeeper
|
||||
--check-zookeeper-session
|
||||
--order random
|
||||
--print-time
|
||||
--jobs "${NPROC}"
|
||||
)
|
||||
time clickhouse-test "${test_opts[@]}" -- "$FASTTEST_FOCUS" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee "$FASTTEST_OUTPUT/test_result.txt"
|
||||
set -e
|
||||
|
@ -11,7 +11,7 @@ def removesuffix(text, suffix):
|
||||
https://www.python.org/dev/peps/pep-0616/
|
||||
"""
|
||||
if suffix and text.endswith(suffix):
|
||||
return text[:-len(suffix)]
|
||||
return text[: -len(suffix)]
|
||||
else:
|
||||
return text[:]
|
||||
|
||||
|
@ -60,5 +60,5 @@ clientPort=2181 \n\
|
||||
maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg
|
||||
RUN mkdir /zookeeper && chmod -R 777 /zookeeper
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
ENV TZ=Etc/UTC
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
@ -42,6 +42,9 @@ COPY prepare_hive_data.sh /
|
||||
COPY demo_data.txt /
|
||||
|
||||
ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
|
||||
|
||||
RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format
|
||||
RUN apt install -y python3 python3-pip
|
||||
RUN pip3 install flask requests
|
||||
COPY http_api_server.py /
|
||||
COPY start.sh /
|
||||
|
||||
|
73
docker/test/integration/hive_server/http_api_server.py
Normal file
73
docker/test/integration/hive_server/http_api_server.py
Normal file
@ -0,0 +1,73 @@
|
||||
import os
|
||||
import subprocess
|
||||
import datetime
|
||||
from flask import Flask, flash, request, redirect, url_for
|
||||
|
||||
|
||||
def run_command(command, wait=False):
|
||||
print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
|
||||
lines = []
|
||||
p = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
|
||||
)
|
||||
if wait:
|
||||
for l in iter(p.stdout.readline, b""):
|
||||
lines.append(l)
|
||||
p.poll()
|
||||
return (lines, p.returncode)
|
||||
else:
|
||||
return (iter(p.stdout.readline, b""), 0)
|
||||
|
||||
|
||||
UPLOAD_FOLDER = "./"
|
||||
ALLOWED_EXTENSIONS = {"txt", "sh"}
|
||||
app = Flask(__name__)
|
||||
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def hello_world():
|
||||
return "Hello World"
|
||||
|
||||
|
||||
def allowed_file(filename):
|
||||
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
@app.route("/upload", methods=["GET", "POST"])
|
||||
def upload_file():
|
||||
if request.method == "POST":
|
||||
# check if the post request has the file part
|
||||
if "file" not in request.files:
|
||||
flash("No file part")
|
||||
return redirect(request.url)
|
||||
file = request.files["file"]
|
||||
# If the user does not select a file, the browser submits an
|
||||
# empty file without a filename.
|
||||
if file.filename == "":
|
||||
flash("No selected file")
|
||||
return redirect(request.url)
|
||||
if file and allowed_file(file.filename):
|
||||
filename = file.filename
|
||||
file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
|
||||
return redirect(url_for("upload_file", name=filename))
|
||||
return """
|
||||
<!doctype html>
|
||||
<title>Upload new File</title>
|
||||
<h1>Upload new File</h1>
|
||||
<form method=post enctype=multipart/form-data>
|
||||
<input type=file name=file>
|
||||
<input type=submit value=Upload>
|
||||
</form>
|
||||
"""
|
||||
|
||||
|
||||
@app.route("/run", methods=["GET", "POST"])
|
||||
def parse_request():
|
||||
data = request.data # data is empty
|
||||
run_command(data, wait=True)
|
||||
return "Ok"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(port=5011)
|
@ -2,5 +2,9 @@
|
||||
hive -e "create database test"
|
||||
|
||||
hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; "
|
||||
hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'"
|
||||
hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text "
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
|
||||
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;"
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;"
|
||||
|
@ -1,6 +1,5 @@
|
||||
service ssh start
|
||||
sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml
|
||||
hadoop namenode -format
|
||||
start-all.sh
|
||||
service mysql start
|
||||
mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\""
|
||||
@ -9,4 +8,4 @@ schematool -initSchema -dbType mysql
|
||||
#nohup hiveserver2 &
|
||||
nohup hive --service metastore &
|
||||
bash /prepare_hive_data.sh
|
||||
while true; do sleep 1000; done
|
||||
python3 http_api_server.py
|
||||
|
@ -40,7 +40,7 @@ RUN apt-get update \
|
||||
/tmp/* \
|
||||
&& apt-get clean
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
ENV TZ=Etc/UTC
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
|
@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
|
||||
from threading import Thread
|
||||
from scipy import stats
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
|
||||
)
|
||||
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
|
||||
|
||||
def reportStageEnd(stage):
|
||||
global stage_start_seconds, total_start_seconds
|
||||
|
||||
current = time.perf_counter()
|
||||
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
|
||||
print(
|
||||
f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
|
||||
)
|
||||
stage_start_seconds = current
|
||||
|
||||
|
||||
def tsv_escape(s):
|
||||
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
|
||||
return (
|
||||
s.replace("\\", "\\\\")
|
||||
.replace("\t", "\\t")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\r", "")
|
||||
)
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run performance test.')
|
||||
parser = argparse.ArgumentParser(description="Run performance test.")
|
||||
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
|
||||
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
|
||||
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
|
||||
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
|
||||
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
|
||||
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
|
||||
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
|
||||
parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
|
||||
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
|
||||
parser.add_argument(
|
||||
"file",
|
||||
metavar="FILE",
|
||||
type=argparse.FileType("r", encoding="utf-8"),
|
||||
nargs=1,
|
||||
help="test description file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
nargs="*",
|
||||
default=["localhost"],
|
||||
help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
nargs="*",
|
||||
default=[9000],
|
||||
help="Space-separated list of server port(s). Corresponds to '--host' options.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--runs", type=int, default=1, help="Number of query runs per server."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-queries",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Test no more than this number of queries, chosen at random.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--queries-to-run",
|
||||
nargs="*",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Space-separated list of indexes of queries to test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-query-seconds",
|
||||
type=int,
|
||||
default=15,
|
||||
help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prewarm-max-query-seconds",
|
||||
type=int,
|
||||
default=180,
|
||||
help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--profile-seconds",
|
||||
type=int,
|
||||
default=0,
|
||||
help="For how many seconds to profile a query for which the performance has changed.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--long", action="store_true", help="Do not skip the tests tagged as long."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-queries", action="store_true", help="Print test queries and exit."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-settings", action="store_true", help="Print test settings and exit."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-created-tables",
|
||||
action="store_true",
|
||||
help="Don't drop the created tables after the test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-existing-tables",
|
||||
action="store_true",
|
||||
help="Don't create or drop the tables, use the existing ones instead.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
reportStageEnd('start')
|
||||
reportStageEnd("start")
|
||||
|
||||
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
|
||||
|
||||
tree = et.parse(args.file[0])
|
||||
root = tree.getroot()
|
||||
|
||||
reportStageEnd('parse')
|
||||
reportStageEnd("parse")
|
||||
|
||||
# Process query parameters
|
||||
subst_elems = root.findall('substitutions/substitution')
|
||||
subst_elems = root.findall("substitutions/substitution")
|
||||
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
|
||||
for e in subst_elems:
|
||||
name = e.find('name').text
|
||||
values = [v.text for v in e.findall('values/value')]
|
||||
name = e.find("name").text
|
||||
values = [v.text for v in e.findall("values/value")]
|
||||
if not values:
|
||||
raise Exception(f'No values given for substitution {{{name}}}')
|
||||
raise Exception(f"No values given for substitution {{{name}}}")
|
||||
|
||||
available_parameters[name] = values
|
||||
|
||||
@ -78,7 +146,7 @@ for e in subst_elems:
|
||||
# parameters. The set of parameters is determined based on the first list.
|
||||
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
|
||||
# followed by CREATE in create queries section, so the order matters.
|
||||
def substitute_parameters(query_templates, other_templates = []):
|
||||
def substitute_parameters(query_templates, other_templates=[]):
|
||||
query_results = []
|
||||
other_results = [[]] * (len(other_templates))
|
||||
for i, q in enumerate(query_templates):
|
||||
@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
|
||||
# and reporting the queries marked as short.
|
||||
test_queries = []
|
||||
is_short = []
|
||||
for e in root.findall('query'):
|
||||
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]])
|
||||
for e in root.findall("query"):
|
||||
new_queries, [new_is_short] = substitute_parameters(
|
||||
[e.text], [[e.attrib.get("short", "0")]]
|
||||
)
|
||||
test_queries += new_queries
|
||||
is_short += [eval(s) for s in new_is_short]
|
||||
|
||||
assert(len(test_queries) == len(is_short))
|
||||
assert len(test_queries) == len(is_short)
|
||||
|
||||
# If we're given a list of queries to run, check that it makes sense.
|
||||
for i in args.queries_to_run or []:
|
||||
if i < 0 or i >= len(test_queries):
|
||||
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
|
||||
print(
|
||||
f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
|
||||
)
|
||||
exit(1)
|
||||
|
||||
# If we're only asked to print the queries, do that and exit.
|
||||
@ -125,60 +197,65 @@ if args.print_queries:
|
||||
# Print short queries
|
||||
for i, s in enumerate(is_short):
|
||||
if s:
|
||||
print(f'short\t{i}')
|
||||
print(f"short\t{i}")
|
||||
|
||||
# If we're only asked to print the settings, do that and exit. These are settings
|
||||
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
|
||||
# '--max_memory_usage=10000000'.
|
||||
if args.print_settings:
|
||||
for s in root.findall('settings/*'):
|
||||
print(f'--{s.tag}={s.text}')
|
||||
for s in root.findall("settings/*"):
|
||||
print(f"--{s.tag}={s.text}")
|
||||
|
||||
exit(0)
|
||||
|
||||
# Skip long tests
|
||||
if not args.long:
|
||||
for tag in root.findall('.//tag'):
|
||||
if tag.text == 'long':
|
||||
print('skipped\tTest is tagged as long.')
|
||||
for tag in root.findall(".//tag"):
|
||||
if tag.text == "long":
|
||||
print("skipped\tTest is tagged as long.")
|
||||
sys.exit(0)
|
||||
|
||||
# Print report threshold for the test if it is set.
|
||||
ignored_relative_change = 0.05
|
||||
if 'max_ignored_relative_change' in root.attrib:
|
||||
if "max_ignored_relative_change" in root.attrib:
|
||||
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
|
||||
print(f'report-threshold\t{ignored_relative_change}')
|
||||
print(f"report-threshold\t{ignored_relative_change}")
|
||||
|
||||
reportStageEnd('before-connect')
|
||||
reportStageEnd("before-connect")
|
||||
|
||||
# Open connections
|
||||
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
|
||||
servers = [
|
||||
{"host": host or args.host[0], "port": port or args.port[0]}
|
||||
for (host, port) in itertools.zip_longest(args.host, args.port)
|
||||
]
|
||||
# Force settings_is_important to fail queries on unknown settings.
|
||||
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
|
||||
all_connections = [
|
||||
clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
|
||||
]
|
||||
|
||||
for i, s in enumerate(servers):
|
||||
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
|
||||
|
||||
reportStageEnd('connect')
|
||||
reportStageEnd("connect")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run drop queries, ignoring errors. Do this before all other activity,
|
||||
# because clickhouse_driver disconnects on error (this is not configurable),
|
||||
# and the new connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_query_templates = [q.text for q in root.findall("drop_query")]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
except:
|
||||
pass
|
||||
|
||||
reportStageEnd('drop-1')
|
||||
reportStageEnd("drop-1")
|
||||
|
||||
# Apply settings.
|
||||
settings = root.findall('settings/*')
|
||||
settings = root.findall("settings/*")
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
|
||||
@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
|
||||
# the test, which is wrong.
|
||||
c.execute("select 1")
|
||||
|
||||
reportStageEnd('settings')
|
||||
reportStageEnd("settings")
|
||||
|
||||
# Check tables that should exist. If they don't exist, just skip this test.
|
||||
tables = [e.text for e in root.findall('preconditions/table_exists')]
|
||||
tables = [e.text for e in root.findall("preconditions/table_exists")]
|
||||
for t in tables:
|
||||
for c in all_connections:
|
||||
try:
|
||||
res = c.execute("select 1 from {} limit 1".format(t))
|
||||
except:
|
||||
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
|
||||
skipped_message = ' '.join(exception_message.split('\n')[:2])
|
||||
print(f'skipped\t{tsv_escape(skipped_message)}')
|
||||
skipped_message = " ".join(exception_message.split("\n")[:2])
|
||||
print(f"skipped\t{tsv_escape(skipped_message)}")
|
||||
sys.exit(0)
|
||||
|
||||
reportStageEnd('preconditions')
|
||||
reportStageEnd("preconditions")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
# keep the relative order of elements, and etree doesn't support the
|
||||
# appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*')
|
||||
if q.tag in ('create_query', 'fill_query')]
|
||||
create_query_templates = [
|
||||
q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
|
||||
]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on
|
||||
# errors, and temporary tables are destroyed. We want to be able to continue
|
||||
# after some errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
if re.search("create temporary table", q, flags=re.IGNORECASE):
|
||||
print(
|
||||
f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
|
||||
threads = [
|
||||
Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
Thread(target=do_create, args=(connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)
|
||||
]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
@ -238,14 +319,16 @@ if not args.use_existing_tables:
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
reportStageEnd('create')
|
||||
reportStageEnd("create")
|
||||
|
||||
# By default, test all queries.
|
||||
queries_to_run = range(0, len(test_queries))
|
||||
|
||||
if args.max_queries:
|
||||
# If specified, test a limited number of queries chosen at random.
|
||||
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
|
||||
queries_to_run = random.sample(
|
||||
range(0, len(test_queries)), min(len(test_queries), args.max_queries)
|
||||
)
|
||||
|
||||
if args.queries_to_run:
|
||||
# Run the specified queries.
|
||||
@ -255,16 +338,16 @@ if args.queries_to_run:
|
||||
profile_total_seconds = 0
|
||||
for query_index in queries_to_run:
|
||||
q = test_queries[query_index]
|
||||
query_prefix = f'{test_name}.query{query_index}'
|
||||
query_prefix = f"{test_name}.query{query_index}"
|
||||
|
||||
# We have some crazy long queries (about 100kB), so trim them to a sane
|
||||
# length. This means we can't use query text as an identifier and have to
|
||||
# use the test name + the test-wide query index.
|
||||
query_display_name = q
|
||||
if len(query_display_name) > 1000:
|
||||
query_display_name = f'{query_display_name[:1000]}...({query_index})'
|
||||
query_display_name = f"{query_display_name[:1000]}...({query_index})"
|
||||
|
||||
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
|
||||
print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
|
||||
|
||||
# Prewarm: run once on both servers. Helps to bring the data into memory,
|
||||
# precompile the queries, etc.
|
||||
@ -272,10 +355,10 @@ for query_index in queries_to_run:
|
||||
# new one. We want to run them on the new server only, so that the PR author
|
||||
# can ensure that the test works properly. Remember the errors we had on
|
||||
# each server.
|
||||
query_error_on_connection = [None] * len(all_connections);
|
||||
query_error_on_connection = [None] * len(all_connections)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
try:
|
||||
prewarm_id = f'{query_prefix}.prewarm0'
|
||||
prewarm_id = f"{query_prefix}.prewarm0"
|
||||
|
||||
try:
|
||||
# During the warmup runs, we will also:
|
||||
@ -283,25 +366,30 @@ for query_index in queries_to_run:
|
||||
# * collect profiler traces, which might be helpful for analyzing
|
||||
# test coverage. We disable profiler for normal runs because
|
||||
# it makes the results unstable.
|
||||
res = c.execute(q, query_id = prewarm_id,
|
||||
settings = {
|
||||
'max_execution_time': args.prewarm_max_query_seconds,
|
||||
'query_profiler_real_time_period_ns': 10000000,
|
||||
'memory_profiler_step': '4Mi',
|
||||
})
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=prewarm_id,
|
||||
settings={
|
||||
"max_execution_time": args.prewarm_max_query_seconds,
|
||||
"query_profiler_real_time_period_ns": 10000000,
|
||||
"memory_profiler_step": "4Mi",
|
||||
},
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (prewarm_id, *e.args)
|
||||
e.message = prewarm_id + ': ' + e.message
|
||||
e.message = prewarm_id + ": " + e.message
|
||||
raise
|
||||
|
||||
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
print(
|
||||
f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
# FIXME the driver reconnects on error and we lose settings, so this
|
||||
# might lead to further errors or unexpected behavior.
|
||||
query_error_on_connection[conn_index] = traceback.format_exc();
|
||||
query_error_on_connection[conn_index] = traceback.format_exc()
|
||||
continue
|
||||
|
||||
# Report all errors that ocurred during prewarm and decide what to do next.
|
||||
@ -311,14 +399,14 @@ for query_index in queries_to_run:
|
||||
no_errors = []
|
||||
for i, e in enumerate(query_error_on_connection):
|
||||
if e:
|
||||
print(e, file = sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
else:
|
||||
no_errors.append(i)
|
||||
|
||||
if len(no_errors) == 0:
|
||||
continue
|
||||
elif len(no_errors) < len(all_connections):
|
||||
print(f'partial\t{query_index}\t{no_errors}')
|
||||
print(f"partial\t{query_index}\t{no_errors}")
|
||||
|
||||
this_query_connections = [all_connections[index] for index in no_errors]
|
||||
|
||||
@ -337,27 +425,34 @@ for query_index in queries_to_run:
|
||||
all_server_times.append([])
|
||||
|
||||
while True:
|
||||
run_id = f'{query_prefix}.run{run}'
|
||||
run_id = f"{query_prefix}.run{run}"
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=run_id,
|
||||
settings={"max_execution_time": args.max_query_seconds},
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
e.message = run_id + ': ' + e.message
|
||||
e.message = run_id + ": " + e.message
|
||||
raise
|
||||
|
||||
elapsed = c.last_query.elapsed
|
||||
all_server_times[conn_index].append(elapsed)
|
||||
|
||||
server_seconds += elapsed
|
||||
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
|
||||
print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
|
||||
|
||||
if elapsed > args.max_query_seconds:
|
||||
# Do not stop processing pathologically slow queries,
|
||||
# since this may hide errors in other queries.
|
||||
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
|
||||
print(
|
||||
f"The query no. {query_index} is taking too long to run ({elapsed} s)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Be careful with the counter, after this line it's the next iteration
|
||||
# already.
|
||||
@ -386,7 +481,7 @@ for query_index in queries_to_run:
|
||||
break
|
||||
|
||||
client_seconds = time.perf_counter() - start_seconds
|
||||
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
|
||||
print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
|
||||
|
||||
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
|
||||
# We have to do it after normal runs because otherwise it will affect test statistics too much
|
||||
@ -397,13 +492,15 @@ for query_index in queries_to_run:
|
||||
# Don't fail if for some reason there are not enough measurements.
|
||||
continue
|
||||
|
||||
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
|
||||
pvalue = stats.ttest_ind(
|
||||
all_server_times[0], all_server_times[1], equal_var=False
|
||||
).pvalue
|
||||
median = [statistics.median(t) for t in all_server_times]
|
||||
# Keep this consistent with the value used in report. Should eventually move
|
||||
# to (median[1] - median[0]) / min(median), which is compatible with "times"
|
||||
# difference we use in report (max(median) / min(median)).
|
||||
relative_diff = (median[1] - median[0]) / median[0]
|
||||
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
|
||||
print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
|
||||
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
|
||||
continue
|
||||
|
||||
@ -412,25 +509,31 @@ for query_index in queries_to_run:
|
||||
profile_start_seconds = time.perf_counter()
|
||||
run = 0
|
||||
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
|
||||
run_id = f'{query_prefix}.profile{run}'
|
||||
run_id = f"{query_prefix}.profile{run}"
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
|
||||
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=run_id,
|
||||
settings={"query_profiler_real_time_period_ns": 10000000},
|
||||
)
|
||||
print(
|
||||
f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
e.message = run_id + ': ' + e.message
|
||||
e.message = run_id + ": " + e.message
|
||||
raise
|
||||
|
||||
run += 1
|
||||
|
||||
profile_total_seconds += time.perf_counter() - profile_start_seconds
|
||||
|
||||
print(f'profile-total\t{profile_total_seconds}')
|
||||
print(f"profile-total\t{profile_total_seconds}")
|
||||
|
||||
reportStageEnd('run')
|
||||
reportStageEnd("run")
|
||||
|
||||
# Run drop queries
|
||||
if not args.keep_created_tables and not args.use_existing_tables:
|
||||
@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
|
||||
reportStageEnd('drop-2')
|
||||
reportStageEnd("drop-2")
|
||||
|
@ -12,9 +12,13 @@ import pprint
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
parser = argparse.ArgumentParser(description='Create performance test report')
|
||||
parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
|
||||
help='Which report to build')
|
||||
parser = argparse.ArgumentParser(description="Create performance test report")
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
default="main",
|
||||
choices=["main", "all-queries"],
|
||||
help="Which report to build",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tables = []
|
||||
@ -31,8 +35,8 @@ unstable_partial_queries = 0
|
||||
# max seconds to run one query by itself, not counting preparation
|
||||
allowed_single_run_time = 2
|
||||
|
||||
color_bad='#ffb0c0'
|
||||
color_good='#b0d050'
|
||||
color_bad = "#ffb0c0"
|
||||
color_good = "#b0d050"
|
||||
|
||||
header_template = """
|
||||
<!DOCTYPE html>
|
||||
@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
|
||||
table_anchor = 0
|
||||
row_anchor = 0
|
||||
|
||||
|
||||
def currentTableAnchor():
|
||||
global table_anchor
|
||||
return f'{table_anchor}'
|
||||
return f"{table_anchor}"
|
||||
|
||||
|
||||
def newTableAnchor():
|
||||
global table_anchor
|
||||
table_anchor += 1
|
||||
return currentTableAnchor()
|
||||
|
||||
|
||||
def currentRowAnchor():
|
||||
global row_anchor
|
||||
global table_anchor
|
||||
return f'{table_anchor}.{row_anchor}'
|
||||
return f"{table_anchor}.{row_anchor}"
|
||||
|
||||
|
||||
def nextRowAnchor():
|
||||
global row_anchor
|
||||
global table_anchor
|
||||
return f'{table_anchor}.{row_anchor + 1}'
|
||||
return f"{table_anchor}.{row_anchor + 1}"
|
||||
|
||||
|
||||
def advanceRowAnchor():
|
||||
global row_anchor
|
||||
@ -178,43 +187,58 @@ def advanceRowAnchor():
|
||||
|
||||
|
||||
def tr(x, anchor=None):
|
||||
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
|
||||
# return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
|
||||
anchor = anchor if anchor else advanceRowAnchor()
|
||||
return f'<tr id={anchor}>{x}</tr>'
|
||||
return f"<tr id={anchor}>{x}</tr>"
|
||||
|
||||
def td(value, cell_attributes = ''):
|
||||
return '<td {cell_attributes}>{value}</td>'.format(
|
||||
cell_attributes = cell_attributes,
|
||||
value = value)
|
||||
|
||||
def th(value, cell_attributes = ''):
|
||||
return '<th {cell_attributes}>{value}</th>'.format(
|
||||
cell_attributes = cell_attributes,
|
||||
value = value)
|
||||
def td(value, cell_attributes=""):
|
||||
return "<td {cell_attributes}>{value}</td>".format(
|
||||
cell_attributes=cell_attributes, value=value
|
||||
)
|
||||
|
||||
def tableRow(cell_values, cell_attributes = [], anchor=None):
|
||||
|
||||
def th(value, cell_attributes=""):
|
||||
return "<th {cell_attributes}>{value}</th>".format(
|
||||
cell_attributes=cell_attributes, value=value
|
||||
)
|
||||
|
||||
|
||||
def tableRow(cell_values, cell_attributes=[], anchor=None):
|
||||
return tr(
|
||||
''.join([td(v, a)
|
||||
"".join(
|
||||
[
|
||||
td(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes,
|
||||
fillvalue = '')
|
||||
if a is not None and v is not None]),
|
||||
anchor)
|
||||
cell_values, cell_attributes, fillvalue=""
|
||||
)
|
||||
if a is not None and v is not None
|
||||
]
|
||||
),
|
||||
anchor,
|
||||
)
|
||||
|
||||
def tableHeader(cell_values, cell_attributes = []):
|
||||
|
||||
def tableHeader(cell_values, cell_attributes=[]):
|
||||
return tr(
|
||||
''.join([th(v, a)
|
||||
"".join(
|
||||
[
|
||||
th(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes,
|
||||
fillvalue = '')
|
||||
if a is not None and v is not None]))
|
||||
cell_values, cell_attributes, fillvalue=""
|
||||
)
|
||||
if a is not None and v is not None
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def tableStart(title):
|
||||
cls = '-'.join(title.lower().split(' ')[:3]);
|
||||
cls = "-".join(title.lower().split(" ")[:3])
|
||||
global table_anchor
|
||||
table_anchor = cls
|
||||
anchor = currentTableAnchor()
|
||||
help_anchor = '-'.join(title.lower().split(' '));
|
||||
help_anchor = "-".join(title.lower().split(" "))
|
||||
return f"""
|
||||
<h2 id="{anchor}">
|
||||
<a class="cancela" href="#{anchor}">{title}</a>
|
||||
@ -223,12 +247,14 @@ def tableStart(title):
|
||||
<table class="{cls}">
|
||||
"""
|
||||
|
||||
|
||||
def tableEnd():
|
||||
return '</table>'
|
||||
return "</table>"
|
||||
|
||||
|
||||
def tsvRows(n):
|
||||
try:
|
||||
with open(n, encoding='utf-8') as fd:
|
||||
with open(n, encoding="utf-8") as fd:
|
||||
result = []
|
||||
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
|
||||
new_row = []
|
||||
@ -237,27 +263,32 @@ def tsvRows(n):
|
||||
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
|
||||
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
|
||||
|
||||
new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8'))
|
||||
new_row.append(
|
||||
e.encode("utf-8")
|
||||
.decode("unicode-escape")
|
||||
.encode("latin1")
|
||||
.decode("utf-8")
|
||||
)
|
||||
result.append(new_row)
|
||||
return result
|
||||
|
||||
except:
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def htmlRows(n):
|
||||
rawRows = tsvRows(n)
|
||||
result = ''
|
||||
result = ""
|
||||
for row in rawRows:
|
||||
result += tableRow(row)
|
||||
return result
|
||||
|
||||
|
||||
def addSimpleTable(caption, columns, rows, pos=None):
|
||||
global tables
|
||||
text = ''
|
||||
text = ""
|
||||
if not rows:
|
||||
return
|
||||
|
||||
@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None):
|
||||
text += tableEnd()
|
||||
tables.insert(pos if pos else len(tables), text)
|
||||
|
||||
|
||||
def add_tested_commits():
|
||||
global report_errors
|
||||
try:
|
||||
addSimpleTable('Tested Commits', ['Old', 'New'],
|
||||
[['<pre>{}</pre>'.format(x) for x in
|
||||
[open('left-commit.txt').read(),
|
||||
open('right-commit.txt').read()]]])
|
||||
addSimpleTable(
|
||||
"Tested Commits",
|
||||
["Old", "New"],
|
||||
[
|
||||
[
|
||||
"<pre>{}</pre>".format(x)
|
||||
for x in [
|
||||
open("left-commit.txt").read(),
|
||||
open("right-commit.txt").read(),
|
||||
]
|
||||
]
|
||||
],
|
||||
)
|
||||
except:
|
||||
# Don't fail if no commit info -- maybe it's a manual run.
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
|
||||
|
||||
def add_report_errors():
|
||||
global tables
|
||||
global report_errors
|
||||
# Add the errors reported by various steps of comparison script
|
||||
try:
|
||||
report_errors += [l.strip() for l in open('report/errors.log')]
|
||||
report_errors += [l.strip() for l in open("report/errors.log")]
|
||||
except:
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
|
||||
if not report_errors:
|
||||
return
|
||||
|
||||
text = tableStart('Errors while Building the Report')
|
||||
text += tableHeader(['Error'])
|
||||
text = tableStart("Errors while Building the Report")
|
||||
text += tableHeader(["Error"])
|
||||
for x in report_errors:
|
||||
text += tableRow([x])
|
||||
text += tableEnd()
|
||||
# Insert after Tested Commits
|
||||
tables.insert(1, text)
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def add_errors_explained():
|
||||
if not errors_explained:
|
||||
return
|
||||
|
||||
text = '<a name="fail1"/>'
|
||||
text += tableStart('Error Summary')
|
||||
text += tableHeader(['Description'])
|
||||
text += tableStart("Error Summary")
|
||||
text += tableHeader(["Description"])
|
||||
for row in errors_explained:
|
||||
text += tableRow(row)
|
||||
text += tableEnd()
|
||||
@ -321,59 +364,81 @@ def add_errors_explained():
|
||||
tables.insert(1, text)
|
||||
|
||||
|
||||
if args.report == 'main':
|
||||
if args.report == "main":
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
||||
|
||||
run_error_rows = tsvRows('run-errors.tsv')
|
||||
run_error_rows = tsvRows("run-errors.tsv")
|
||||
error_tests += len(run_error_rows)
|
||||
addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows)
|
||||
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
|
||||
if run_error_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
|
||||
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
|
||||
error_tests += len(slow_on_client_rows)
|
||||
addSimpleTable('Slow on Client',
|
||||
['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'],
|
||||
slow_on_client_rows)
|
||||
addSimpleTable(
|
||||
"Slow on Client",
|
||||
["Client time, s", "Server time, s", "Ratio", "Test", "Query"],
|
||||
slow_on_client_rows,
|
||||
)
|
||||
if slow_on_client_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
|
||||
]
|
||||
)
|
||||
|
||||
unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv')
|
||||
unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
|
||||
error_tests += len(unmarked_short_rows)
|
||||
addSimpleTable('Unexpected Query Duration',
|
||||
['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'],
|
||||
unmarked_short_rows)
|
||||
addSimpleTable(
|
||||
"Unexpected Query Duration",
|
||||
["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
|
||||
unmarked_short_rows,
|
||||
)
|
||||
if unmarked_short_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
|
||||
]
|
||||
)
|
||||
|
||||
def add_partial():
|
||||
rows = tsvRows('report/partial-queries-report.tsv')
|
||||
rows = tsvRows("report/partial-queries-report.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
global unstable_partial_queries, slow_average_tests, tables
|
||||
text = tableStart('Partial Queries')
|
||||
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query']
|
||||
text = tableStart("Partial Queries")
|
||||
columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
|
||||
text += tableHeader(columns)
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
for row in rows:
|
||||
anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}'
|
||||
anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
|
||||
if float(row[1]) > 0.10:
|
||||
attrs[1] = f'style="background: {color_bad}"'
|
||||
unstable_partial_queries += 1
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[1] = ''
|
||||
attrs[1] = ""
|
||||
if float(row[0]) > allowed_single_run_time:
|
||||
attrs[0] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
|
||||
]
|
||||
)
|
||||
slow_average_tests += 1
|
||||
else:
|
||||
attrs[0] = ''
|
||||
attrs[0] = ""
|
||||
text += tableRow(row, attrs, anchor)
|
||||
text += tableEnd()
|
||||
tables.append(text)
|
||||
@ -381,41 +446,45 @@ if args.report == 'main':
|
||||
add_partial()
|
||||
|
||||
def add_changes():
|
||||
rows = tsvRows('report/changed-perf.tsv')
|
||||
rows = tsvRows("report/changed-perf.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
global faster_queries, slower_queries, tables
|
||||
|
||||
text = tableStart('Changes in Performance')
|
||||
text = tableStart("Changes in Performance")
|
||||
columns = [
|
||||
'Old, s', # 0
|
||||
'New, s', # 1
|
||||
'Ratio of speedup (-) or slowdown (+)', # 2
|
||||
'Relative difference (new − old) / old', # 3
|
||||
'p < 0.01 threshold', # 4
|
||||
'', # Failed # 5
|
||||
'Test', # 6
|
||||
'#', # 7
|
||||
'Query', # 8
|
||||
"Old, s", # 0
|
||||
"New, s", # 1
|
||||
"Ratio of speedup (-) or slowdown (+)", # 2
|
||||
"Relative difference (new − old) / old", # 3
|
||||
"p < 0.01 threshold", # 4
|
||||
"", # Failed # 5
|
||||
"Test", # 6
|
||||
"#", # 7
|
||||
"Query", # 8
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[5] = None
|
||||
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for row in rows:
|
||||
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}'
|
||||
anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
|
||||
if int(row[5]):
|
||||
if float(row[3]) < 0.:
|
||||
if float(row[3]) < 0.0:
|
||||
faster_queries += 1
|
||||
attrs[2] = attrs[3] = f'style="background: {color_good}"'
|
||||
else:
|
||||
slower_queries += 1
|
||||
attrs[2] = attrs[3] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[2] = attrs[3] = ''
|
||||
attrs[2] = attrs[3] = ""
|
||||
|
||||
text += tableRow(row, attrs, anchor)
|
||||
|
||||
@ -427,35 +496,35 @@ if args.report == 'main':
|
||||
def add_unstable_queries():
|
||||
global unstable_queries, very_unstable_queries, tables
|
||||
|
||||
unstable_rows = tsvRows('report/unstable-queries.tsv')
|
||||
unstable_rows = tsvRows("report/unstable-queries.tsv")
|
||||
if not unstable_rows:
|
||||
return
|
||||
|
||||
unstable_queries += len(unstable_rows)
|
||||
|
||||
columns = [
|
||||
'Old, s', #0
|
||||
'New, s', #1
|
||||
'Relative difference (new - old)/old', #2
|
||||
'p < 0.01 threshold', #3
|
||||
'', # Failed #4
|
||||
'Test', #5
|
||||
'#', #6
|
||||
'Query' #7
|
||||
"Old, s", # 0
|
||||
"New, s", # 1
|
||||
"Relative difference (new - old)/old", # 2
|
||||
"p < 0.01 threshold", # 3
|
||||
"", # Failed #4
|
||||
"Test", # 5
|
||||
"#", # 6
|
||||
"Query", # 7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[4] = None
|
||||
|
||||
text = tableStart('Unstable Queries')
|
||||
text = tableStart("Unstable Queries")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for r in unstable_rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
|
||||
if int(r[4]):
|
||||
very_unstable_queries += 1
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[3] = ''
|
||||
attrs[3] = ""
|
||||
# Just don't add the slightly unstable queries we don't consider
|
||||
# errors. It's not clear what the user should do with them.
|
||||
continue
|
||||
@ -470,53 +539,70 @@ if args.report == 'main':
|
||||
|
||||
add_unstable_queries()
|
||||
|
||||
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
|
||||
addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows)
|
||||
skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
|
||||
addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
|
||||
|
||||
addSimpleTable('Test Performance Changes',
|
||||
['Test', 'Ratio of speedup (-) or slowdown (+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'],
|
||||
tsvRows('report/test-perf-changes.tsv'))
|
||||
addSimpleTable(
|
||||
"Test Performance Changes",
|
||||
[
|
||||
"Test",
|
||||
"Ratio of speedup (-) or slowdown (+)",
|
||||
"Queries",
|
||||
"Total not OK",
|
||||
"Changed perf",
|
||||
"Unstable",
|
||||
],
|
||||
tsvRows("report/test-perf-changes.tsv"),
|
||||
)
|
||||
|
||||
def add_test_times():
|
||||
global slow_average_tests, tables
|
||||
rows = tsvRows('report/test-times.tsv')
|
||||
rows = tsvRows("report/test-times.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = [
|
||||
'Test', #0
|
||||
'Wall clock time, entire test, s', #1
|
||||
'Total client time for measured query runs, s', #2
|
||||
'Queries', #3
|
||||
'Longest query, total for measured runs, s', #4
|
||||
'Wall clock time per query, s', #5
|
||||
'Shortest query, total for measured runs, s', #6
|
||||
'', # Runs #7
|
||||
"Test", # 0
|
||||
"Wall clock time, entire test, s", # 1
|
||||
"Total client time for measured query runs, s", # 2
|
||||
"Queries", # 3
|
||||
"Longest query, total for measured runs, s", # 4
|
||||
"Wall clock time per query, s", # 5
|
||||
"Shortest query, total for measured runs, s", # 6
|
||||
"", # Runs #7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[7] = None
|
||||
|
||||
text = tableStart('Test Times')
|
||||
text = tableStart("Test Times")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[0]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[0]}"
|
||||
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
|
||||
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs:
|
||||
if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
|
||||
# FIXME should be 15s max -- investigate parallel_insert
|
||||
slow_average_tests += 1
|
||||
attrs[5] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[5] = ''
|
||||
attrs[5] = ""
|
||||
|
||||
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs:
|
||||
if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
|
||||
slow_average_tests += 1
|
||||
attrs[4] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[4] = ''
|
||||
attrs[4] = ""
|
||||
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
@ -525,10 +611,17 @@ if args.report == 'main':
|
||||
|
||||
add_test_times()
|
||||
|
||||
addSimpleTable('Metric Changes',
|
||||
['Metric', 'Old median value', 'New median value',
|
||||
'Relative difference', 'Times difference'],
|
||||
tsvRows('metrics/changes.tsv'))
|
||||
addSimpleTable(
|
||||
"Metric Changes",
|
||||
[
|
||||
"Metric",
|
||||
"Old median value",
|
||||
"New median value",
|
||||
"Relative difference",
|
||||
"Times difference",
|
||||
],
|
||||
tsvRows("metrics/changes.tsv"),
|
||||
)
|
||||
|
||||
add_report_errors()
|
||||
add_errors_explained()
|
||||
@ -536,7 +629,8 @@ if args.report == 'main':
|
||||
for t in tables:
|
||||
print(t)
|
||||
|
||||
print(f"""
|
||||
print(
|
||||
f"""
|
||||
</div>
|
||||
<p class="links">
|
||||
<a href="all-queries.html">All queries</a>
|
||||
@ -546,104 +640,111 @@ if args.report == 'main':
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
status = 'success'
|
||||
message = 'See the report'
|
||||
status = "success"
|
||||
message = "See the report"
|
||||
message_array = []
|
||||
|
||||
if slow_average_tests:
|
||||
status = 'failure'
|
||||
message_array.append(str(slow_average_tests) + ' too long')
|
||||
status = "failure"
|
||||
message_array.append(str(slow_average_tests) + " too long")
|
||||
|
||||
if faster_queries:
|
||||
message_array.append(str(faster_queries) + ' faster')
|
||||
message_array.append(str(faster_queries) + " faster")
|
||||
|
||||
if slower_queries:
|
||||
if slower_queries > 3:
|
||||
status = 'failure'
|
||||
message_array.append(str(slower_queries) + ' slower')
|
||||
status = "failure"
|
||||
message_array.append(str(slower_queries) + " slower")
|
||||
|
||||
if unstable_partial_queries:
|
||||
very_unstable_queries += unstable_partial_queries
|
||||
status = 'failure'
|
||||
status = "failure"
|
||||
|
||||
# Don't show mildly unstable queries, only the very unstable ones we
|
||||
# treat as errors.
|
||||
if very_unstable_queries:
|
||||
if very_unstable_queries > 5:
|
||||
error_tests += very_unstable_queries
|
||||
status = 'failure'
|
||||
message_array.append(str(very_unstable_queries) + ' unstable')
|
||||
status = "failure"
|
||||
message_array.append(str(very_unstable_queries) + " unstable")
|
||||
|
||||
error_tests += slow_average_tests
|
||||
if error_tests:
|
||||
status = 'failure'
|
||||
message_array.insert(0, str(error_tests) + ' errors')
|
||||
status = "failure"
|
||||
message_array.insert(0, str(error_tests) + " errors")
|
||||
|
||||
if message_array:
|
||||
message = ', '.join(message_array)
|
||||
message = ", ".join(message_array)
|
||||
|
||||
if report_errors:
|
||||
status = 'failure'
|
||||
message = 'Errors while building the report.'
|
||||
status = "failure"
|
||||
message = "Errors while building the report."
|
||||
|
||||
print(("""
|
||||
print(
|
||||
(
|
||||
"""
|
||||
<!--status: {status}-->
|
||||
<!--message: {message}-->
|
||||
""".format(status=status, message=message)))
|
||||
""".format(
|
||||
status=status, message=message
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
elif args.report == 'all-queries':
|
||||
elif args.report == "all-queries":
|
||||
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
||||
def add_all_queries():
|
||||
rows = tsvRows('report/all-queries.tsv')
|
||||
rows = tsvRows("report/all-queries.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = [
|
||||
'', # Changed #0
|
||||
'', # Unstable #1
|
||||
'Old, s', #2
|
||||
'New, s', #3
|
||||
'Ratio of speedup (-) or slowdown (+)', #4
|
||||
'Relative difference (new − old) / old', #5
|
||||
'p < 0.01 threshold', #6
|
||||
'Test', #7
|
||||
'#', #8
|
||||
'Query', #9
|
||||
"", # Changed #0
|
||||
"", # Unstable #1
|
||||
"Old, s", # 2
|
||||
"New, s", # 3
|
||||
"Ratio of speedup (-) or slowdown (+)", # 4
|
||||
"Relative difference (new − old) / old", # 5
|
||||
"p < 0.01 threshold", # 6
|
||||
"Test", # 7
|
||||
"#", # 8
|
||||
"Query", # 9
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[0] = None
|
||||
attrs[1] = None
|
||||
|
||||
text = tableStart('All Query Times')
|
||||
text = tableStart("All Query Times")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
|
||||
if int(r[1]):
|
||||
attrs[6] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[6] = ''
|
||||
attrs[6] = ""
|
||||
|
||||
if int(r[0]):
|
||||
if float(r[5]) > 0.:
|
||||
if float(r[5]) > 0.0:
|
||||
attrs[4] = attrs[5] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[4] = attrs[5] = f'style="background: {color_good}"'
|
||||
else:
|
||||
attrs[4] = attrs[5] = ''
|
||||
attrs[4] = attrs[5] = ""
|
||||
|
||||
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
|
||||
attrs[2] = f'style="background: {color_bad}"'
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[2] = ''
|
||||
attrs[3] = ''
|
||||
attrs[2] = ""
|
||||
attrs[3] = ""
|
||||
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
@ -655,7 +756,8 @@ elif args.report == 'all-queries':
|
||||
for t in tables:
|
||||
print(t)
|
||||
|
||||
print(f"""
|
||||
print(
|
||||
f"""
|
||||
</div>
|
||||
<p class="links">
|
||||
<a href="report.html">Main report</a>
|
||||
@ -665,4 +767,5 @@ elif args.report == 'all-queries':
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
@ -7,18 +7,19 @@ import csv
|
||||
|
||||
RESULT_LOG_NAME = "run.log"
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
|
||||
status = "success"
|
||||
description = 'Server started and responded'
|
||||
description = "Server started and responded"
|
||||
summary = [("Smoke test", "OK")]
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log:
|
||||
lines = run_log.read().split('\n')
|
||||
if not lines or lines[0].strip() != 'OK':
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
|
||||
lines = run_log.read().split("\n")
|
||||
if not lines or lines[0].strip() != "OK":
|
||||
status = "failure"
|
||||
logging.info("Lines is not ok: %s", str('\n'.join(lines)))
|
||||
logging.info("Lines is not ok: %s", str("\n".join(lines)))
|
||||
summary = [("Smoke test", "FAIL")]
|
||||
description = 'Server failed to respond, see result in logs'
|
||||
description = "Server failed to respond, see result in logs"
|
||||
|
||||
result_logs = []
|
||||
server_log_path = os.path.join(result_folder, "clickhouse-server.log")
|
||||
@ -38,20 +39,22 @@ def process_result(result_folder):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of split build smoke test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
|
@ -10,11 +10,18 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
summary = []
|
||||
paths = []
|
||||
tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"]
|
||||
tests = [
|
||||
"TLPWhere",
|
||||
"TLPGroupBy",
|
||||
"TLPHaving",
|
||||
"TLPWhereGroupBy",
|
||||
"TLPDistinct",
|
||||
"TLPAggregate",
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
err_path = '{}/{}.err'.format(result_folder, test)
|
||||
out_path = '{}/{}.out'.format(result_folder, test)
|
||||
err_path = "{}/{}.err".format(result_folder, test)
|
||||
out_path = "{}/{}.out".format(result_folder, test)
|
||||
if not os.path.exists(err_path):
|
||||
logging.info("No output err on path %s", err_path)
|
||||
summary.append((test, "SKIPPED"))
|
||||
@ -23,24 +30,24 @@ def process_result(result_folder):
|
||||
else:
|
||||
paths.append(err_path)
|
||||
paths.append(out_path)
|
||||
with open(err_path, 'r') as f:
|
||||
if 'AssertionError' in f.read():
|
||||
with open(err_path, "r") as f:
|
||||
if "AssertionError" in f.read():
|
||||
summary.append((test, "FAIL"))
|
||||
status = 'failure'
|
||||
status = "failure"
|
||||
else:
|
||||
summary.append((test, "OK"))
|
||||
|
||||
logs_path = '{}/logs.tar.gz'.format(result_folder)
|
||||
logs_path = "{}/logs.tar.gz".format(result_folder)
|
||||
if not os.path.exists(logs_path):
|
||||
logging.info("No logs tar on path %s", logs_path)
|
||||
else:
|
||||
paths.append(logs_path)
|
||||
stdout_path = '{}/stdout.log'.format(result_folder)
|
||||
stdout_path = "{}/stdout.log".format(result_folder)
|
||||
if not os.path.exists(stdout_path):
|
||||
logging.info("No stdout log on path %s", stdout_path)
|
||||
else:
|
||||
paths.append(stdout_path)
|
||||
stderr_path = '{}/stderr.log'.format(result_folder)
|
||||
stderr_path = "{}/stderr.log".format(result_folder)
|
||||
if not os.path.exists(stderr_path):
|
||||
logging.info("No stderr log on path %s", stderr_path)
|
||||
else:
|
||||
@ -52,20 +59,22 @@ def process_result(result_folder):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of sqlancer test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
|
@ -13,6 +13,17 @@ COPY s3downloader /s3downloader
|
||||
|
||||
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
|
||||
ENV DATASETS="hits visits"
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
# Download Minio-related binaries
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& wget "https://dl.min.io/server/minio/release/linux-${arch}/minio" \
|
||||
&& chmod +x ./minio \
|
||||
&& wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \
|
||||
&& chmod +x ./mc
|
||||
ENV MINIO_ROOT_USER="clickhouse"
|
||||
ENV MINIO_ROOT_PASSWORD="clickhouse"
|
||||
COPY setup_minio.sh /
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
@ -17,6 +17,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
./setup_minio.sh
|
||||
|
||||
function start()
|
||||
{
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
@ -93,6 +95,8 @@ else
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits"
|
||||
fi
|
||||
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
66
docker/test/stateful/setup_minio.sh
Executable file
66
docker/test/stateful/setup_minio.sh
Executable file
@ -0,0 +1,66 @@
|
||||
#!/bin/bash
|
||||
|
||||
# TODO: Make this file shared with stateless tests
|
||||
#
|
||||
# Usage for local run:
|
||||
#
|
||||
# ./docker/test/stateful/setup_minio.sh ./tests/
|
||||
#
|
||||
|
||||
set -e -x -a -u
|
||||
|
||||
ls -lha
|
||||
|
||||
mkdir -p ./minio_data
|
||||
|
||||
if [ ! -f ./minio ]; then
|
||||
echo 'MinIO binary not found, downloading...'
|
||||
|
||||
BINARY_TYPE=$(uname -s | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
wget "https://dl.min.io/server/minio/release/${BINARY_TYPE}-amd64/minio" \
|
||||
&& chmod +x ./minio \
|
||||
&& wget "https://dl.min.io/client/mc/release/${BINARY_TYPE}-amd64/mc" \
|
||||
&& chmod +x ./mc
|
||||
fi
|
||||
|
||||
MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse}
|
||||
MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-clickhouse}
|
||||
|
||||
./minio server --address ":11111" ./minio_data &
|
||||
|
||||
while ! curl -v --silent http://localhost:11111 2>&1 | grep AccessDenied
|
||||
do
|
||||
echo "Trying to connect to minio"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
lsof -i :11111
|
||||
|
||||
sleep 5
|
||||
|
||||
./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
|
||||
./mc admin user add clickminio test testtest
|
||||
./mc admin policy set clickminio readwrite user=test
|
||||
./mc mb clickminio/test
|
||||
|
||||
|
||||
# Upload data to Minio. By default after unpacking all tests will in
|
||||
# /usr/share/clickhouse-test/queries
|
||||
|
||||
TEST_PATH=${1:-/usr/share/clickhouse-test}
|
||||
MINIO_DATA_PATH=${TEST_PATH}/queries/1_stateful/data_minio
|
||||
|
||||
# Iterating over globs will cause redudant FILE variale to be a path to a file, not a filename
|
||||
# shellcheck disable=SC2045
|
||||
for FILE in $(ls "${MINIO_DATA_PATH}"); do
|
||||
echo "$FILE";
|
||||
./mc cp "${MINIO_DATA_PATH}"/"$FILE" clickminio/test/"$FILE";
|
||||
done
|
||||
|
||||
mkdir -p ~/.aws
|
||||
cat <<EOT >> ~/.aws/credentials
|
||||
[default]
|
||||
aws_access_key_id=clickhouse
|
||||
aws_secret_access_key=clickhouse
|
||||
EOT
|
@ -60,6 +60,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
|
||||
ENV MINIO_ROOT_USER="clickhouse"
|
||||
ENV MINIO_ROOT_PASSWORD="clickhouse"
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
COPY run.sh /
|
||||
COPY setup_minio.sh /
|
||||
|
@ -25,9 +25,11 @@ RUN apt-get update -y \
|
||||
brotli
|
||||
|
||||
COPY ./stress /stress
|
||||
COPY ./download_previous_release /download_previous_release
|
||||
COPY run.sh /
|
||||
|
||||
ENV DATASETS="hits visits"
|
||||
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
110
docker/test/stress/download_previous_release
Executable file
110
docker/test/stress/download_previous_release
Executable file
@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.packages.urllib3.util.retry import Retry
|
||||
|
||||
CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
|
||||
|
||||
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static-dbg_{version}_amd64.deb"
|
||||
CLICKHOUSE_SERVER_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-server_{version}_all.deb"
|
||||
CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-client_{version}_amd64.deb"
|
||||
|
||||
|
||||
CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb"
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = "clickhouse-common-static-dbg_{version}_amd64.deb"
|
||||
CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb"
|
||||
CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb"
|
||||
|
||||
PACKETS_DIR = "previous_release_package_folder/"
|
||||
VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)"
|
||||
|
||||
|
||||
class Version:
|
||||
def __init__(self, version):
|
||||
self.version = version
|
||||
|
||||
def __lt__(self, other):
|
||||
return list(map(int, self.version.split('.'))) < list(map(int, other.version.split('.')))
|
||||
|
||||
def __str__(self):
|
||||
return self.version
|
||||
|
||||
|
||||
class ReleaseInfo:
|
||||
def __init__(self, version, release_type):
|
||||
self.version = version
|
||||
self.type = release_type
|
||||
|
||||
|
||||
def find_previous_release(server_version, releases):
|
||||
releases.sort(key=lambda x: x.version, reverse=True)
|
||||
for release in releases:
|
||||
if release.version < server_version:
|
||||
return True, release
|
||||
|
||||
return False, None
|
||||
|
||||
|
||||
def get_previous_release(server_version):
|
||||
page = 1
|
||||
found = False
|
||||
while not found:
|
||||
response = requests.get(CLICKHOUSE_TAGS_URL, {'page': page, 'per_page': 100})
|
||||
if not response.ok:
|
||||
raise Exception('Cannot load the list of tags from github: ' + response.reason)
|
||||
|
||||
releases_str = set(re.findall(VERSION_PATTERN, response.text))
|
||||
if len(releases_str) == 0:
|
||||
raise Exception('Cannot find previous release for ' + str(server_version) + ' server version')
|
||||
|
||||
releases = list(map(lambda x: ReleaseInfo(Version(x.split('-')[0]), x.split('-')[1]), releases_str))
|
||||
found, previous_release = find_previous_release(server_version, releases)
|
||||
page += 1
|
||||
|
||||
return previous_release
|
||||
|
||||
|
||||
def download_packet(url, local_file_name, retries=10, backoff_factor=0.3):
|
||||
session = requests.Session()
|
||||
retry = Retry(
|
||||
total=retries,
|
||||
read=retries,
|
||||
connect=retries,
|
||||
backoff_factor=backoff_factor,
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
response = session.get(url)
|
||||
print(url)
|
||||
if response.ok:
|
||||
open(PACKETS_DIR + local_file_name, 'wb').write(response.content)
|
||||
|
||||
|
||||
def download_packets(release):
|
||||
if not os.path.exists(PACKETS_DIR):
|
||||
os.makedirs(PACKETS_DIR)
|
||||
|
||||
download_packet(CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version))
|
||||
|
||||
download_packet(CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version))
|
||||
|
||||
download_packet(CLICKHOUSE_SERVER_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version))
|
||||
|
||||
download_packet(CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
server_version = Version(input())
|
||||
previous_release = get_previous_release(server_version)
|
||||
download_packets(previous_release)
|
||||
|
@ -22,15 +22,19 @@ export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
|
||||
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
|
||||
|
||||
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
|
||||
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
|
||||
|
||||
|
||||
dpkg -i package_folder/clickhouse-common-static_*.deb
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i package_folder/clickhouse-server_*.deb
|
||||
dpkg -i package_folder/clickhouse-client_*.deb
|
||||
function install_packages()
|
||||
{
|
||||
dpkg -i $1/clickhouse-common-static_*.deb
|
||||
dpkg -i $1/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i $1/clickhouse-server_*.deb
|
||||
dpkg -i $1/clickhouse-client_*.deb
|
||||
}
|
||||
|
||||
function configure()
|
||||
{
|
||||
@ -116,7 +120,7 @@ function start()
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 240 ]
|
||||
if [ "$counter" -gt ${1:-240} ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
@ -127,6 +131,9 @@ function start()
|
||||
# use root to match with current uid
|
||||
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
|
||||
sleep 0.5
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n200 /var/log/clickhouse-server/stderr.log
|
||||
tail -n200 /var/log/clickhouse-server/clickhouse-server.log
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
|
||||
@ -171,8 +178,12 @@ quit
|
||||
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
|
||||
}
|
||||
|
||||
install_packages package_folder
|
||||
|
||||
configure
|
||||
|
||||
./setup_minio.sh
|
||||
|
||||
start
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
@ -188,6 +199,8 @@ clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits"
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" \
|
||||
@ -242,6 +255,120 @@ zgrep -Fa "########################################" /test_output/* > /dev/null
|
||||
zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \
|
||||
&& echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv
|
||||
|
||||
echo -e "Backward compatibility check\n"
|
||||
|
||||
echo "Download previous release server"
|
||||
mkdir previous_release_package_folder
|
||||
clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv
|
||||
stop
|
||||
|
||||
# Uninstall current packages
|
||||
dpkg --remove clickhouse-client
|
||||
dpkg --remove clickhouse-server
|
||||
dpkg --remove clickhouse-common-static-dbg
|
||||
dpkg --remove clickhouse-common-static
|
||||
|
||||
rm -rf /var/lib/clickhouse/*
|
||||
|
||||
# Install previous release packages
|
||||
install_packages previous_release_package_folder
|
||||
|
||||
# Start server from previous release
|
||||
configure
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Install new package before running stress test because we should use new clickhouse-client and new clickhouse-test
|
||||
install_packages package_folder
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
|
||||
|
||||
stop
|
||||
|
||||
# Start new server
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Let the server run for a while before checking log.
|
||||
sleep 60
|
||||
|
||||
stop
|
||||
|
||||
# Error messages (we should ignore some errors)
|
||||
zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
|
||||
-e "REPLICA_IS_ALREADY_ACTIVE" \
|
||||
-e "REPLICA_IS_ALREADY_EXIST" \
|
||||
-e "DDLWorker: Cannot parse DDL task query" \
|
||||
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
|
||||
-e "UNKNOWN_DATABASE" \
|
||||
-e "NETWORK_ERROR" \
|
||||
-e "UNKNOWN_TABLE" \
|
||||
-e "ZooKeeperClient" \
|
||||
-e "KEEPER_EXCEPTION" \
|
||||
-e "DirectoryMonitor" \
|
||||
-e "TABLE_IS_READ_ONLY" \
|
||||
-e "Code: 1000, e.code() = 111, Connection refused" \
|
||||
-e "UNFINISHED" \
|
||||
-e "Renaming unexpected part" \
|
||||
/var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /dev/null \
|
||||
&& echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
# Sanitizer asserts
|
||||
zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
# Logical errors
|
||||
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
# Crash
|
||||
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
|
||||
else
|
||||
echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv
|
||||
fi
|
||||
|
||||
zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \
|
||||
&& echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
|
||||
# Put logs into /test_output/
|
||||
for log_file in /var/log/clickhouse-server/clickhouse-server.log*
|
||||
do
|
||||
|
@ -47,7 +47,8 @@ def get_options(i):
|
||||
return ' '.join(options)
|
||||
|
||||
|
||||
def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit):
|
||||
def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit, backward_compatibility_check):
|
||||
backward_compatibility_check_option = '--backward-compatibility-check' if backward_compatibility_check else ''
|
||||
global_time_limit_option = ''
|
||||
if global_time_limit:
|
||||
global_time_limit_option = "--global_time_limit={}".format(global_time_limit)
|
||||
@ -56,7 +57,7 @@ def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_t
|
||||
pipes = []
|
||||
for i in range(0, len(output_paths)):
|
||||
f = open(output_paths[i], 'w')
|
||||
full_command = "{} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option)
|
||||
full_command = "{} {} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option, backward_compatibility_check_option)
|
||||
logging.info("Run func tests '%s'", full_command)
|
||||
p = Popen(full_command, shell=True, stdout=f, stderr=f)
|
||||
pipes.append(p)
|
||||
@ -168,6 +169,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--output-folder")
|
||||
parser.add_argument("--global-time-limit", type=int, default=1800)
|
||||
parser.add_argument("--num-parallel", type=int, default=cpu_count())
|
||||
parser.add_argument('--backward-compatibility-check', action='store_true')
|
||||
parser.add_argument('--hung-check', action='store_true', default=False)
|
||||
# make sense only for hung check
|
||||
parser.add_argument('--drop-databases', action='store_true', default=False)
|
||||
@ -176,7 +178,7 @@ if __name__ == "__main__":
|
||||
if args.drop_databases and not args.hung_check:
|
||||
raise Exception("--drop-databases only used in hung check (--hung-check)")
|
||||
func_pipes = []
|
||||
func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit)
|
||||
func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit, args.backward_compatibility_check)
|
||||
|
||||
logging.info("Will wait functests to finish")
|
||||
while True:
|
||||
|
@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
python3-pip \
|
||||
shellcheck \
|
||||
yamllint \
|
||||
&& pip3 install codespell PyGithub boto3 unidiff dohq-artifactory
|
||||
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
|
@ -14,6 +14,7 @@ def process_result(result_folder):
|
||||
("header duplicates", "duplicate_output.txt"),
|
||||
("shellcheck", "shellcheck_output.txt"),
|
||||
("style", "style_output.txt"),
|
||||
("black", "black_output.txt"),
|
||||
("typos", "typos_output.txt"),
|
||||
("whitespaces", "whitespaces_output.txt"),
|
||||
("workflows", "workflows_output.txt"),
|
||||
|
@ -7,11 +7,13 @@ echo "Check duplicates" | ts
|
||||
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
|
||||
echo "Check style" | ts
|
||||
./check-style -n |& tee /test_output/style_output.txt
|
||||
echo "Check python formatting with black" | ts
|
||||
./check-black -n |& tee /test_output/black_output.txt
|
||||
echo "Check typos" | ts
|
||||
./check-typos |& tee /test_output/typos_output.txt
|
||||
echo "Check whitespaces" | ts
|
||||
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
|
||||
echo "Check sorkflows" | ts
|
||||
echo "Check workflows" | ts
|
||||
./check-workflows |& tee /test_output/workflows_output.txt
|
||||
echo "Check shell scripts with shellcheck" | ts
|
||||
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
|
||||
|
@ -22,9 +22,9 @@ def process_result(result_folder):
|
||||
total_other = 0
|
||||
test_results = []
|
||||
for test in results["tests"]:
|
||||
test_name = test['test']['test_name']
|
||||
test_result = test['result']['result_type'].upper()
|
||||
test_time = str(test['result']['message_rtime'])
|
||||
test_name = test["test"]["test_name"]
|
||||
test_result = test["result"]["result_type"].upper()
|
||||
test_time = str(test["result"]["message_rtime"])
|
||||
total_tests += 1
|
||||
if test_result == "OK":
|
||||
total_ok += 1
|
||||
@ -39,24 +39,29 @@ def process_result(result_folder):
|
||||
else:
|
||||
status = "success"
|
||||
|
||||
description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other)
|
||||
description = "failed: {}, passed: {}, other: {}".format(
|
||||
total_fail, total_ok, total_other
|
||||
)
|
||||
return status, description, test_results, [json_path, test_binary_log]
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests")
|
||||
parser.add_argument("--in-results-dir", default='./')
|
||||
parser.add_argument("--out-results-file", default='./test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='./check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of Testflows tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="./")
|
||||
parser.add_argument("--out-results-file", default="./test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="./check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
@ -64,4 +69,3 @@ if __name__ == "__main__":
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
||||
|
@ -5,24 +5,26 @@ import logging
|
||||
import argparse
|
||||
import csv
|
||||
|
||||
OK_SIGN = 'OK ]'
|
||||
FAILED_SIGN = 'FAILED ]'
|
||||
SEGFAULT = 'Segmentation fault'
|
||||
SIGNAL = 'received signal SIG'
|
||||
PASSED = 'PASSED'
|
||||
OK_SIGN = "OK ]"
|
||||
FAILED_SIGN = "FAILED ]"
|
||||
SEGFAULT = "Segmentation fault"
|
||||
SIGNAL = "received signal SIG"
|
||||
PASSED = "PASSED"
|
||||
|
||||
|
||||
def get_test_name(line):
|
||||
elements = reversed(line.split(' '))
|
||||
elements = reversed(line.split(" "))
|
||||
for element in elements:
|
||||
if '(' not in element and ')' not in element:
|
||||
if "(" not in element and ")" not in element:
|
||||
return element
|
||||
raise Exception("No test name in line '{}'".format(line))
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
summary = []
|
||||
total_counter = 0
|
||||
failed_counter = 0
|
||||
result_log_path = '{}/test_result.txt'.format(result_folder)
|
||||
result_log_path = "{}/test_result.txt".format(result_folder)
|
||||
if not os.path.exists(result_log_path):
|
||||
logging.info("No output log on path %s", result_log_path)
|
||||
return "exception", "No output log", []
|
||||
@ -30,7 +32,7 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
description = ""
|
||||
passed = False
|
||||
with open(result_log_path, 'r') as test_result:
|
||||
with open(result_log_path, "r") as test_result:
|
||||
for line in test_result:
|
||||
if OK_SIGN in line:
|
||||
logging.info("Found ok line: '%s'", line)
|
||||
@ -38,7 +40,7 @@ def process_result(result_folder):
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
summary.append((test_name, "OK"))
|
||||
total_counter += 1
|
||||
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line:
|
||||
elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
|
||||
logging.info("Found fail line: '%s'", line)
|
||||
test_name = get_test_name(line.strip())
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
@ -67,25 +69,30 @@ def process_result(result_folder):
|
||||
status = "failure"
|
||||
|
||||
if not description:
|
||||
description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter)
|
||||
description += "fail: {}, passed: {}".format(
|
||||
failed_counter, total_counter - failed_counter
|
||||
)
|
||||
|
||||
return status, description, summary
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of unit tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
@ -93,4 +100,3 @@ if __name__ == "__main__":
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
||||
|
@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
|
||||
RETRIES_SIGN = "Some tests were restarted"
|
||||
|
||||
|
||||
def process_test_log(log_path):
|
||||
total = 0
|
||||
skipped = 0
|
||||
@ -26,7 +27,7 @@ def process_test_log(log_path):
|
||||
retries = False
|
||||
task_timeout = True
|
||||
test_results = []
|
||||
with open(log_path, 'r') as test_file:
|
||||
with open(log_path, "r") as test_file:
|
||||
for line in test_file:
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
@ -36,12 +37,15 @@ def process_test_log(log_path):
|
||||
hung = True
|
||||
if RETRIES_SIGN in line:
|
||||
retries = True
|
||||
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
|
||||
test_name = line.split(' ')[2].split(':')[0]
|
||||
if any(
|
||||
sign in line
|
||||
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
|
||||
):
|
||||
test_name = line.split(" ")[2].split(":")[0]
|
||||
|
||||
test_time = ''
|
||||
test_time = ""
|
||||
try:
|
||||
time_token = line.split(']')[1].strip().split()[0]
|
||||
time_token = line.split("]")[1].strip().split()[0]
|
||||
float(time_token)
|
||||
test_time = time_token
|
||||
except:
|
||||
@ -66,9 +70,22 @@ def process_test_log(log_path):
|
||||
elif len(test_results) > 0 and test_results[-1][1] == "FAIL":
|
||||
test_results[-1][3].append(original_line)
|
||||
|
||||
test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results]
|
||||
test_results = [
|
||||
(test[0], test[1], test[2], "".join(test[3])) for test in test_results
|
||||
]
|
||||
|
||||
return (
|
||||
total,
|
||||
skipped,
|
||||
unknown,
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
retries,
|
||||
test_results,
|
||||
)
|
||||
|
||||
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
|
||||
|
||||
def process_result(result_path):
|
||||
test_results = []
|
||||
@ -76,16 +93,26 @@ def process_result(result_path):
|
||||
description = ""
|
||||
files = os.listdir(result_path)
|
||||
if files:
|
||||
logging.info("Find files in result folder %s", ','.join(files))
|
||||
result_path = os.path.join(result_path, 'test_result.txt')
|
||||
logging.info("Find files in result folder %s", ",".join(files))
|
||||
result_path = os.path.join(result_path, "test_result.txt")
|
||||
else:
|
||||
result_path = None
|
||||
description = "No output log"
|
||||
state = "error"
|
||||
|
||||
if result_path and os.path.exists(result_path):
|
||||
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
|
||||
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
|
||||
(
|
||||
total,
|
||||
skipped,
|
||||
unknown,
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
retries,
|
||||
test_results,
|
||||
) = process_test_log(result_path)
|
||||
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
|
||||
logging.info("Is flacky check: %s", is_flacky_check)
|
||||
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
|
||||
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
|
||||
@ -120,20 +147,22 @@ def process_result(result_path):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of functional tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
|
@ -38,7 +38,7 @@ Writing the docs is extremely useful for project's users and developers, and gro
|
||||
|
||||
The documentation contains information about all the aspects of the ClickHouse lifecycle: developing, testing, installing, operating, and using. The base language of the documentation is English. The English version is the most actual. All other languages are supported as much as they can by contributors from different countries.
|
||||
|
||||
At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, Chinese, Japanese, and Farsi. We store the documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs).
|
||||
At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, Chinese, Japanese. We store the documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs).
|
||||
|
||||
Each language lays in the corresponding folder. Files that are not translated from English are the symbolic links to the English ones.
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
sudo apt-get install apt-transport-https ca-certificates dirmngr
|
||||
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4
|
||||
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
|
||||
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
|
||||
|
||||
echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \
|
||||
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
|
||||
/etc/apt/sources.list.d/clickhouse.list
|
||||
sudo apt-get update
|
||||
|
||||
sudo apt-get install -y clickhouse-server clickhouse-client
|
||||
|
||||
sudo service clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
clickhouse-client # or "clickhouse-client --password" if you've set up a password.
|
||||
|
11
docs/_includes/install/deb_repo.sh
Normal file
11
docs/_includes/install/deb_repo.sh
Normal file
@ -0,0 +1,11 @@
|
||||
sudo apt-get install apt-transport-https ca-certificates dirmngr
|
||||
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4
|
||||
|
||||
echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \
|
||||
/etc/apt/sources.list.d/clickhouse.list
|
||||
sudo apt-get update
|
||||
|
||||
sudo apt-get install -y clickhouse-server clickhouse-client
|
||||
|
||||
sudo service clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
@ -1,7 +1,6 @@
|
||||
sudo yum install yum-utils
|
||||
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
|
||||
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo
|
||||
sudo yum install clickhouse-server clickhouse-client
|
||||
sudo yum install -y yum-utils
|
||||
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
|
||||
sudo yum install -y clickhouse-server clickhouse-client
|
||||
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
|
7
docs/_includes/install/rpm_repo.sh
Normal file
7
docs/_includes/install/rpm_repo.sh
Normal file
@ -0,0 +1,7 @@
|
||||
sudo yum install yum-utils
|
||||
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
|
||||
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo
|
||||
sudo yum install clickhouse-server clickhouse-client
|
||||
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
@ -1,19 +1,20 @@
|
||||
export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \
|
||||
LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \
|
||||
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
|
||||
export LATEST_VERSION
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz"
|
||||
|
||||
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
|
||||
tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
|
||||
|
||||
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
|
||||
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
|
||||
|
||||
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
|
||||
tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh"
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
|
||||
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
|
||||
tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh"
|
||||
|
19
docs/_includes/install/tgz_repo.sh
Normal file
19
docs/_includes/install/tgz_repo.sh
Normal file
@ -0,0 +1,19 @@
|
||||
export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \
|
||||
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
|
||||
|
||||
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
|
||||
|
||||
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
|
||||
|
||||
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
|
||||
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
|
@ -5,7 +5,7 @@ toc_title: Source Code Browser
|
||||
|
||||
# Browse ClickHouse Source Code {#browse-clickhouse-source-code}
|
||||
|
||||
You can use **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/html_report/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily.
|
||||
You can use **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily.
|
||||
|
||||
Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual.
|
||||
|
||||
|
@ -156,14 +156,6 @@ $ cd ClickHouse
|
||||
$ ./release
|
||||
```
|
||||
|
||||
## Faster builds for development
|
||||
|
||||
Normally all tools of the ClickHouse bundle, such as `clickhouse-server`, `clickhouse-client` etc., are linked into a single static executable, `clickhouse`. This executable must be re-linked on every change, which might be slow. One common way to improve build time is to use the 'split' build configuration, which builds a separate binary for every tool, and further splits the code into several shared libraries. To enable this tweak, pass the following flags to `cmake`:
|
||||
|
||||
```
|
||||
-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1
|
||||
```
|
||||
|
||||
## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
|
||||
|
||||
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
|
||||
@ -172,9 +164,9 @@ They are built for stable, prestable and testing releases as long as for every c
|
||||
|
||||
To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green checkmark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”.
|
||||
|
||||
## Split build configuration {#split-build}
|
||||
## Faster builds for development: Split build configuration {#split-build}
|
||||
|
||||
Normally ClickHouse is statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that on every change the entire binary is linked again, which is slow and may be inconvenient for development. There is an alternative configuration which creates dynamically loaded shared libraries instead, allowing faster incremental builds. To use it, add the following flags to your `cmake` invocation:
|
||||
Normally, ClickHouse is statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that on every change the entire binary needs to be linked, which is slow and may be inconvenient for development. There is an alternative configuration which instead creates dynamically loaded shared libraries and separate binaries `clickhouse-server`, `clickhouse-client` etc., allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation:
|
||||
```
|
||||
-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1
|
||||
```
|
||||
|
@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it
|
||||
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
|
||||
If it fails, fix the style errors following the [code style guide](style.md).
|
||||
|
||||
Python code is checked with [black](https://github.com/psf/black/).
|
||||
|
||||
### Report Details
|
||||
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
|
||||
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).
|
||||
@ -190,15 +192,3 @@ Runs randomly generated queries to catch program errors. If it fails, ask a main
|
||||
|
||||
## Performance Tests
|
||||
Measure changes in query performance. This is the longest check that takes just below 6 hours to run. The performance test report is described in detail [here](https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#how-to-read-the-report).
|
||||
|
||||
|
||||
|
||||
# QA
|
||||
|
||||
> What is a `Task (private network)` item on status pages?
|
||||
|
||||
It's a link to the Yandex's internal job system. Yandex employees can see the check's start time and its more verbose status.
|
||||
|
||||
> Where the tests are run
|
||||
|
||||
Somewhere on Yandex internal infrastructure.
|
||||
|
@ -40,8 +40,8 @@ The list of third-party libraries:
|
||||
| grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) |
|
||||
| h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) |
|
||||
| hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) |
|
||||
| icu | [Public Domain](https://github.com/unicode-org/icu/blob/faa2f9f9e1fe74c5ed00eba371d2830134cdbea1/icu4c/LICENSE) |
|
||||
| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/f020820388e3faafb44cc643574a2d563dfde572/LICENSE) |
|
||||
| icu | [Public Domain](https://github.com/unicode-org/icu/blob/a56dde820dc35665a66f2e9ee8ba58e75049b668/icu4c/LICENSE) |
|
||||
| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5/LICENSE) |
|
||||
| jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) |
|
||||
| krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) |
|
||||
| libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) |
|
||||
|
@ -229,6 +229,25 @@ As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate
|
||||
|
||||
Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion.
|
||||
|
||||
## Debugging
|
||||
|
||||
Many graphical IDEs offer with an integrated debugger but you can also use a standalone debugger.
|
||||
|
||||
### GDB
|
||||
|
||||
### LLDB
|
||||
|
||||
# tell LLDB where to find the source code
|
||||
settings set target.source-map /path/to/build/dir /path/to/source/dir
|
||||
|
||||
# configure LLDB to display code before/after currently executing line
|
||||
settings set stop-line-count-before 10
|
||||
settings set stop-line-count-after 10
|
||||
|
||||
target create ./clickhouse-client
|
||||
# <set breakpoints here>
|
||||
process launch -- --query="SELECT * FROM TAB"
|
||||
|
||||
## Writing Code {#writing-code}
|
||||
|
||||
The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/
|
||||
@ -243,7 +262,7 @@ List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3A
|
||||
|
||||
## Test Data {#test-data}
|
||||
|
||||
Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data from Yandex.Metrica. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks.
|
||||
Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data of web analytics. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks.
|
||||
|
||||
sudo apt install wget xz-utils
|
||||
|
||||
@ -270,7 +289,7 @@ Navigate to your fork repository in GitHub’s UI. If you have been developing i
|
||||
|
||||
A pull request can be created even if the work is not completed yet. In this case please put the word “WIP” (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating release changelogs.
|
||||
|
||||
Testing will commence as soon as Yandex employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.
|
||||
Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.
|
||||
|
||||
The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear).
|
||||
|
||||
|
@ -322,7 +322,7 @@ std::string getName() const override { return "Memory"; }
|
||||
class StorageMemory : public IStorage
|
||||
```
|
||||
|
||||
**4.** `using` are named the same way as classes, or with `_t` on the end.
|
||||
**4.** `using` are named the same way as classes.
|
||||
|
||||
**5.** Names of template type arguments: in simple cases, use `T`; `T`, `U`; `T1`, `T2`.
|
||||
|
||||
@ -404,9 +404,9 @@ enum class CompressionMethod
|
||||
};
|
||||
```
|
||||
|
||||
**15.** All names must be in English. Transliteration of Russian words is not allowed.
|
||||
**15.** All names must be in English. Transliteration of Hebrew words is not allowed.
|
||||
|
||||
not Stroka
|
||||
not T_PAAMAYIM_NEKUDOTAYIM
|
||||
|
||||
**16.** Abbreviations are acceptable if they are well known (when you can easily find the meaning of the abbreviation in Wikipedia or in a search engine).
|
||||
|
||||
@ -490,7 +490,7 @@ if (0 != close(fd))
|
||||
throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE);
|
||||
```
|
||||
|
||||
`Do not use assert`.
|
||||
You can use assert to check invariants in code.
|
||||
|
||||
**4.** Exception types.
|
||||
|
||||
@ -571,7 +571,7 @@ Don’t use these types for numbers: `signed/unsigned long`, `long long`, `short
|
||||
|
||||
**13.** Passing arguments.
|
||||
|
||||
Pass complex values by reference (including `std::string`).
|
||||
Pass complex values by value if they are going to be moved and use std::move; pass by reference if you want to update value in a loop.
|
||||
|
||||
If a function captures ownership of an object created in the heap, make the argument type `shared_ptr` or `unique_ptr`.
|
||||
|
||||
@ -581,7 +581,7 @@ In most cases, just use `return`. Do not write `return std::move(res)`.
|
||||
|
||||
If the function allocates an object on heap and returns it, use `shared_ptr` or `unique_ptr`.
|
||||
|
||||
In rare cases you might need to return the value via an argument. In this case, the argument should be a reference.
|
||||
In rare cases (updating a value in a loop) you might need to return the value via an argument. In this case, the argument should be a reference.
|
||||
|
||||
``` cpp
|
||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
||||
|
@ -11,7 +11,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f
|
||||
|
||||
Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference.
|
||||
|
||||
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and it is available to general public.
|
||||
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from CLickHouse and it is available to general public.
|
||||
|
||||
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
|
||||
|
||||
@ -133,44 +133,6 @@ If the system clickhouse-server is already running and you do not want to stop i
|
||||
|
||||
`clickhouse` binary has almost no dependencies and works across wide range of Linux distributions. To quick and dirty test your changes on a server, you can simply `scp` your fresh built `clickhouse` binary to your server and then run it as in examples above.
|
||||
|
||||
## Testing Environment {#testing-environment}
|
||||
|
||||
Before publishing release as stable we deploy it on testing environment. Testing environment is a cluster that process 1/39 part of [Yandex.Metrica](https://metrica.yandex.com/) data. We share our testing environment with Yandex.Metrica team. ClickHouse is upgraded without downtime on top of existing data. We look at first that data is processed successfully without lagging from realtime, the replication continue to work and there is no issues visible to Yandex.Metrica team. First check can be done in the following way:
|
||||
|
||||
``` sql
|
||||
SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h;
|
||||
```
|
||||
|
||||
In some cases we also deploy to testing environment of our friend teams in Yandex: Market, Cloud, etc. Also we have some hardware servers that are used for development purposes.
|
||||
|
||||
## Load Testing {#load-testing}
|
||||
|
||||
After deploying to testing environment we run load testing with queries from production cluster. This is done manually.
|
||||
|
||||
Make sure you have enabled `query_log` on your production cluster.
|
||||
|
||||
Collect query log for a day or more:
|
||||
|
||||
``` bash
|
||||
$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv
|
||||
```
|
||||
|
||||
This is a way complicated example. `type = 2` will filter queries that are executed successfully. `query LIKE '%ym:%'` is to select relevant queries from Yandex.Metrica. `is_initial_query` is to select only queries that are initiated by client, not by ClickHouse itself (as parts of distributed query processing).
|
||||
|
||||
`scp` this log to your testing cluster and run it as following:
|
||||
|
||||
``` bash
|
||||
$ clickhouse benchmark --concurrency 16 < queries.tsv
|
||||
```
|
||||
|
||||
(probably you also want to specify a `--user`)
|
||||
|
||||
Then leave it for a night or weekend and go take a rest.
|
||||
|
||||
You should check that `clickhouse-server` does not crash, memory footprint is bounded and performance not degrading over time.
|
||||
|
||||
Precise query execution timings are not recorded and not compared due to high variability of queries and environment.
|
||||
|
||||
## Build Tests {#build-tests}
|
||||
|
||||
Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well.
|
||||
@ -259,13 +221,13 @@ Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuz
|
||||
|
||||
## Security Audit
|
||||
|
||||
People from Yandex Security Team did some basic overview of ClickHouse capabilities from the security standpoint.
|
||||
Our Security Team did some basic overview of ClickHouse capabilities from the security standpoint.
|
||||
|
||||
## Static Analyzers {#static-analyzers}
|
||||
|
||||
We run `clang-tidy` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks.
|
||||
|
||||
We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory. Also you can read [the article in russian](https://habr.com/company/yandex/blog/342018/).
|
||||
We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory.
|
||||
|
||||
If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box.
|
||||
|
||||
@ -310,12 +272,6 @@ Alternatively you can try `uncrustify` tool to reformat your code. Configuration
|
||||
|
||||
We also use `codespell` to find typos in code. It is automated as well.
|
||||
|
||||
## Metrica B2B Tests {#metrica-b2b-tests}
|
||||
|
||||
Each ClickHouse release is tested with Yandex Metrica and AppMetrica engines. Testing and stable versions of ClickHouse are deployed on VMs and run with a small copy of Metrica engine that is processing fixed sample of input data. Then results of two instances of Metrica engine are compared together.
|
||||
|
||||
These tests are automated by separate team. Due to high number of moving parts, tests are fail most of the time by completely unrelated reasons, that are very difficult to figure out. Most likely these tests have negative value for us. Nevertheless these tests was proved to be useful in about one or two times out of hundreds.
|
||||
|
||||
## Test Coverage {#test-coverage}
|
||||
|
||||
We also track test coverage but only for functional tests and only for clickhouse-server. It is performed on daily basis.
|
||||
|
@ -36,6 +36,7 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo
|
||||
- `max_flush_data_time` — Maximum number of milliseconds that data is allowed to cache in memory (for database and the cache data unable to query). When this time is exceeded, the data will be materialized. Default: `1000`.
|
||||
- `max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disables retry. Default: `1000`.
|
||||
- `allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`).
|
||||
- `materialized_mysql_tables_list` — a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.
|
||||
|
||||
```sql
|
||||
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***')
|
||||
@ -75,7 +76,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
|
||||
| FLOAT | [Float32](../../sql-reference/data-types/float.md) |
|
||||
| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
|
||||
| DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) |
|
||||
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
|
||||
| DATE, NEWDATE | [Date32](../../sql-reference/data-types/date32.md) |
|
||||
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
|
||||
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
|
||||
| YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) |
|
||||
|
@ -49,6 +49,8 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password')
|
||||
|
||||
All other MySQL data types are converted into [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
Because of the ClickHouse date type has a different range from the MySQL date range,If the MySQL date type is out of the range of ClickHouse date, you can use the setting mysql_datatypes_support_level to modify the mapping from the MySQL date type to the Clickhouse date type: date2Date32 (convert MySQL's date type to ClickHouse Date32) or date2String(convert MySQL's date type to ClickHouse String,this is usually used when your mysql data is less than 1925) or default(convert MySQL's date type to ClickHouse Date).
|
||||
|
||||
[Nullable](../../sql-reference/data-types/nullable.md) is supported.
|
||||
|
||||
## Global Variables Support {#global-variables-support}
|
||||
|
@ -186,6 +186,7 @@ Example:
|
||||
- `_key` — Key of the message.
|
||||
- `_offset` — Offset of the message.
|
||||
- `_timestamp` — Timestamp of the message.
|
||||
- `_timestamp_ms` — Timestamp in milliseconds of the message.
|
||||
- `_partition` — Partition of Kafka topic.
|
||||
|
||||
**See Also**
|
||||
|
@ -26,7 +26,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
|
||||
|
||||
``` sql
|
||||
CREATE TABLE s3_engine_table (name String, value UInt32)
|
||||
ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip')
|
||||
ENGINE=S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip')
|
||||
SETTINGS input_format_with_names_use_header = 0;
|
||||
|
||||
INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3);
|
||||
@ -75,19 +75,19 @@ Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.cs
|
||||
|
||||
``` sql
|
||||
CREATE TABLE big_table (name String, value UInt32)
|
||||
ENGINE = S3('https://storage.yandexcloud.net/my-bucket/my_folder/file-{000..999}.csv', 'CSV');
|
||||
ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/my_folder/file-{000..999}.csv', 'CSV');
|
||||
```
|
||||
|
||||
**Example with wildcards 2**
|
||||
|
||||
Suppose we have several files in CSV format with the following URIs on S3:
|
||||
|
||||
- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_1.csv'
|
||||
- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_2.csv'
|
||||
- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_3.csv'
|
||||
- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_1.csv'
|
||||
- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_2.csv'
|
||||
- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_3.csv'
|
||||
- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_1.csv'
|
||||
- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_2.csv'
|
||||
- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_3.csv'
|
||||
- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_1.csv'
|
||||
- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_2.csv'
|
||||
- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_3.csv'
|
||||
|
||||
|
||||
There are several ways to make a table consisting of all six files:
|
||||
@ -96,21 +96,21 @@ There are several ways to make a table consisting of all six files:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_range (name String, value UInt32)
|
||||
ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/some_file_{1..3}', 'CSV');
|
||||
ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/some_file_{1..3}', 'CSV');
|
||||
```
|
||||
|
||||
2. Take all files with `some_file_` prefix (there should be no extra files with such prefix in both folders):
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_question_mark (name String, value UInt32)
|
||||
ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/some_file_?', 'CSV');
|
||||
ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/some_file_?', 'CSV');
|
||||
```
|
||||
|
||||
3. Take all the files in both folders (all files should satisfy format and schema described in query):
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_with_asterisk (name String, value UInt32)
|
||||
ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/*', 'CSV');
|
||||
ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/*', 'CSV');
|
||||
```
|
||||
|
||||
## S3-related Settings {#settings}
|
||||
@ -142,7 +142,7 @@ The following settings can be specified in configuration file for given endpoint
|
||||
``` xml
|
||||
<s3>
|
||||
<endpoint-name>
|
||||
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
|
||||
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/</endpoint>
|
||||
<!-- <access_key_id>ACCESS_KEY_ID</access_key_id> -->
|
||||
<!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
|
||||
<!-- <region>us-west-1</region> -->
|
||||
|
@ -55,27 +55,28 @@ WHERE table = 'visits'
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─partition─┬─name───────────┬─active─┐
|
||||
┌─partition─┬─name──────────────┬─active─┐
|
||||
│ 201901 │ 201901_1_3_1 │ 0 │
|
||||
│ 201901 │ 201901_1_9_2 │ 1 │
|
||||
│ 201901 │ 201901_1_9_2_11 │ 1 │
|
||||
│ 201901 │ 201901_8_8_0 │ 0 │
|
||||
│ 201901 │ 201901_9_9_0 │ 0 │
|
||||
│ 201902 │ 201902_4_6_1 │ 1 │
|
||||
│ 201902 │ 201902_10_10_0 │ 1 │
|
||||
│ 201902 │ 201902_11_11_0 │ 1 │
|
||||
└───────────┴────────────────┴────────┘
|
||||
│ 201902 │ 201902_4_6_1_11 │ 1 │
|
||||
│ 201902 │ 201902_10_10_0_11 │ 1 │
|
||||
│ 201902 │ 201902_11_11_0_11 │ 1 │
|
||||
└───────────┴───────────────────┴────────┘
|
||||
```
|
||||
|
||||
The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries.
|
||||
|
||||
The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query.
|
||||
|
||||
Let’s break down the name of the first part: `201901_1_3_1`:
|
||||
Let’s break down the name of the part: `201901_1_9_2_11`:
|
||||
|
||||
- `201901` is the partition name.
|
||||
- `1` is the minimum number of the data block.
|
||||
- `3` is the maximum number of the data block.
|
||||
- `1` is the chunk level (the depth of the merge tree it is formed from).
|
||||
- `9` is the maximum number of the data block.
|
||||
- `2` is the chunk level (the depth of the merge tree it is formed from).
|
||||
- `11` is the mutation version (if a part mutated)
|
||||
|
||||
!!! info "Info"
|
||||
The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level).
|
||||
@ -89,16 +90,16 @@ OPTIMIZE TABLE visits PARTITION 201902;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─partition─┬─name───────────┬─active─┐
|
||||
┌─partition─┬─name─────────────┬─active─┐
|
||||
│ 201901 │ 201901_1_3_1 │ 0 │
|
||||
│ 201901 │ 201901_1_9_2 │ 1 │
|
||||
│ 201901 │ 201901_1_9_2_11 │ 1 │
|
||||
│ 201901 │ 201901_8_8_0 │ 0 │
|
||||
│ 201901 │ 201901_9_9_0 │ 0 │
|
||||
│ 201902 │ 201902_4_6_1 │ 0 │
|
||||
│ 201902 │ 201902_4_11_2 │ 1 │
|
||||
│ 201902 │ 201902_4_11_2_11 │ 1 │
|
||||
│ 201902 │ 201902_10_10_0 │ 0 │
|
||||
│ 201902 │ 201902_11_11_0 │ 0 │
|
||||
└───────────┴────────────────┴────────┘
|
||||
└───────────┴──────────────────┴────────┘
|
||||
```
|
||||
|
||||
Inactive parts will be deleted approximately 10 minutes after merging.
|
||||
@ -109,12 +110,12 @@ Another way to view a set of parts and partitions is to go into the directory of
|
||||
/var/lib/clickhouse/data/default/visits$ ls -l
|
||||
total 40
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 201901_1_3_1
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2_11
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_8_8_0
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_9_9_0
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_10_10_0
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_11_11_0
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2_11
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached
|
||||
```
|
||||
|
@ -802,7 +802,7 @@ Configuration markup:
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://storage.yandexcloud.net/my-bucket/root-path/</endpoint>
|
||||
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
<region></region>
|
||||
@ -856,7 +856,7 @@ S3 disk can be configured as `main` or `cold` storage:
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://storage.yandexcloud.net/my-bucket/root-path/</endpoint>
|
||||
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
</s3>
|
||||
@ -887,6 +887,57 @@ S3 disk can be configured as `main` or `cold` storage:
|
||||
|
||||
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
|
||||
|
||||
## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage}
|
||||
|
||||
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
|
||||
|
||||
As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
|
||||
|
||||
Configuration markup:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
...
|
||||
<disks>
|
||||
<blob_storage_disk>
|
||||
<type>azure_blob_storage</type>
|
||||
<storage_account_url>http://account.blob.core.windows.net</storage_account_url>
|
||||
<container_name>container</container_name>
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
|
||||
* `connection_string` - For authentication using a connection string.
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`.
|
||||
* `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
|
||||
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_part` — Name of a part.
|
||||
|
@ -97,7 +97,7 @@ ZooKeeper is not used in `SELECT` queries because replication does not affect th
|
||||
|
||||
For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it does not create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data.
|
||||
|
||||
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasn’t proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
|
||||
For very large clusters, you can use different ZooKeeper clusters for different shards. However, from our experience this has not proven necessary based on production clusters with approximately 300 servers.
|
||||
|
||||
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
|
||||
|
||||
@ -111,7 +111,7 @@ Data blocks are deduplicated. For multiple writes of the same data block (data b
|
||||
|
||||
During replication, only the source data to insert is transferred over the network. Further data transformation (merging) is coordinated and performed on all the replicas in the same way. This minimizes network usage, which means that replication works well when replicas reside in different datacenters. (Note that duplicating data in different datacenters is the main goal of replication.)
|
||||
|
||||
You can have any number of replicas of the same data. Yandex.Metrica uses double replication in production. Each server uses RAID-5 or RAID-6, and RAID-10 in some cases. This is a relatively reliable and convenient solution.
|
||||
You can have any number of replicas of the same data. Based on our experiences, a relatively reliable and convenient solution could use double replication in production, with each server using RAID-5 or RAID-6 (and RAID-10 in some cases).
|
||||
|
||||
The system monitors data synchronicity on replicas and is able to recover after a failure. Failover is automatic (for small differences in data) or semi-automatic (when data differs too much, which may indicate a configuration error).
|
||||
|
||||
@ -163,7 +163,7 @@ Example:
|
||||
<macros>
|
||||
<layer>05</layer>
|
||||
<shard>02</shard>
|
||||
<replica>example05-02-1.yandex.ru</replica>
|
||||
<replica>example05-02-1</replica>
|
||||
</macros>
|
||||
```
|
||||
|
||||
@ -172,7 +172,7 @@ In this case, the path consists of the following parts:
|
||||
|
||||
`/clickhouse/tables/` is the common prefix. We recommend using exactly this one.
|
||||
|
||||
`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the Yandex.Metrica cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier.
|
||||
`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the example cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier.
|
||||
|
||||
`table_name` is the name of the node for the table in ZooKeeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it does not change after a RENAME query.
|
||||
*HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name`
|
||||
|
@ -197,7 +197,7 @@ A simple remainder from the division is a limited solution for sharding and isn
|
||||
You should be concerned about the sharding scheme in the following cases:
|
||||
|
||||
- Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient.
|
||||
- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries.
|
||||
- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries.
|
||||
|
||||
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
|
||||
|
||||
|
@ -22,4 +22,4 @@ Here is the illustration of the difference between traditional row-oriented syst
|
||||
**Columnar**
|
||||
![Columnar](https://clickhouse.com/docs/en/images/column-oriented.gif#)
|
||||
|
||||
A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing because and data warehousing, they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables.
|
||||
A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing and data warehousing, because they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables.
|
||||
|
@ -6,7 +6,7 @@ toc_priority: 110
|
||||
|
||||
# Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce}
|
||||
|
||||
We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT.
|
||||
We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Large IT companies often have proprietary in-house solutions.
|
||||
|
||||
These systems aren’t appropriate for online queries due to their high latency. In other words, they can’t be used as the back-end for a web interface. These types of systems aren’t useful for real-time data updates. Distributed sorting isn’t the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks.
|
||||
|
||||
|
@ -9,7 +9,7 @@ toc_priority: 11
|
||||
|
||||
This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front.
|
||||
|
||||
Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is.
|
||||
Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, Yandex. That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is.
|
||||
|
||||
One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it.
|
||||
|
||||
|
@ -11,7 +11,7 @@ This section describes how to obtain example datasets and import them into Click
|
||||
The list of documented datasets:
|
||||
|
||||
- [GitHub Events](../../getting-started/example-datasets/github-events.md)
|
||||
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
|
||||
- [Anonymized Web Analytics Dataset](../../getting-started/example-datasets/metrica.md)
|
||||
- [Recipes](../../getting-started/example-datasets/recipes.md)
|
||||
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
|
||||
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
|
||||
|
@ -1,11 +1,11 @@
|
||||
---
|
||||
toc_priority: 15
|
||||
toc_title: Yandex.Metrica Data
|
||||
toc_title: Web Analytics Data
|
||||
---
|
||||
|
||||
# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data}
|
||||
# Anonymized Web Analytics Data {#anonymized-web-analytics-data}
|
||||
|
||||
Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section.
|
||||
Dataset consists of two tables containing anonymized web analytics data with hits (`hits_v1`) and visits (`visits_v1`).
|
||||
|
||||
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://datasets.clickhouse.com/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://datasets.clickhouse.com/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
|
||||
|
||||
@ -73,6 +73,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
|
||||
|
||||
## Example Queries {#example-queries}
|
||||
|
||||
[ClickHouse tutorial](../../getting-started/tutorial.md) is based on Yandex.Metrica dataset and the recommended way to get started with this dataset is to just go through tutorial.
|
||||
[The ClickHouse tutorial](../../getting-started/tutorial.md) is based on this web analytics dataset, and the recommended way to get started with this dataset is to go through the tutorial.
|
||||
|
||||
Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hits` and `test.visits` there).
|
||||
|
@ -375,7 +375,7 @@ Q3: 0.051 sec.
|
||||
Q4: 0.072 sec.
|
||||
|
||||
In this case, the query processing time is determined above all by network latency.
|
||||
We ran queries using a client located in a Yandex datacenter in Finland on a cluster in Russia, which added about 20 ms of latency.
|
||||
We ran queries using a client located in a different datacenter than where the cluster was located, which added about 20 ms of latency.
|
||||
|
||||
## Summary {#summary}
|
||||
|
||||
|
@ -27,9 +27,17 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun
|
||||
{% include 'install/deb.sh' %}
|
||||
```
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
<summary>Deprecated Method for installing deb-packages</summary>
|
||||
``` bash
|
||||
{% include 'install/deb_repo.sh' %}
|
||||
```
|
||||
</details>
|
||||
|
||||
You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs.
|
||||
|
||||
You can also download and install packages manually from [here](https://repo.clickhouse.com/deb/stable/main/).
|
||||
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable).
|
||||
|
||||
#### Packages {#packages}
|
||||
|
||||
@ -49,11 +57,17 @@ It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat
|
||||
First, you need to add the official repository:
|
||||
|
||||
``` bash
|
||||
sudo yum install yum-utils
|
||||
sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
|
||||
sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64
|
||||
{% include 'install/rpm.sh' %}
|
||||
```
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
<summary>Deprecated Method for installing rpm-packages</summary>
|
||||
``` bash
|
||||
{% include 'install/rpm_repo.sh' %}
|
||||
```
|
||||
</details>
|
||||
|
||||
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available.
|
||||
|
||||
Then run these commands to install packages:
|
||||
@ -62,36 +76,27 @@ Then run these commands to install packages:
|
||||
sudo yum install clickhouse-server clickhouse-client
|
||||
```
|
||||
|
||||
You can also download and install packages manually from [here](https://repo.clickhouse.com/rpm/stable/x86_64).
|
||||
You can also download and install packages manually from [here](https://packages.clickhouse.com/rpm/stable).
|
||||
|
||||
### From Tgz Archives {#from-tgz-archives}
|
||||
|
||||
It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.
|
||||
|
||||
The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.com/tgz/.
|
||||
The required version can be downloaded with `curl` or `wget` from repository https://packages.clickhouse.com/tgz/.
|
||||
After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version:
|
||||
|
||||
``` bash
|
||||
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
|
||||
|
||||
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
|
||||
|
||||
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
|
||||
|
||||
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
|
||||
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
|
||||
{% include 'install/tgz.sh' %}
|
||||
```
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
<summary>Deprecated Method for installing tgz archives</summary>
|
||||
``` bash
|
||||
{% include 'install/tgz_repo.sh' %}
|
||||
```
|
||||
</details>
|
||||
|
||||
For production environments, it’s recommended to use the latest `stable`-version. You can find its number on GitHub page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`.
|
||||
|
||||
### From Docker Image {#from-docker-image}
|
||||
@ -215,6 +220,6 @@ SELECT 1
|
||||
|
||||
**Congratulations, the system works!**
|
||||
|
||||
To continue experimenting, you can download one of the test data sets or go through [tutorial](https://clickhouse.com/tutorial.html).
|
||||
To continue experimenting, you can download one of the test data sets or go through [tutorial](./tutorial.md).
|
||||
|
||||
[Original article](https://clickhouse.com/docs/en/getting_started/install/) <!--hide-->
|
||||
|
@ -5,29 +5,19 @@ toc_title: Playground
|
||||
|
||||
# ClickHouse Playground {#clickhouse-playground}
|
||||
|
||||
[ClickHouse Playground](https://play.clickhouse.com) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with.
|
||||
|
||||
ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md).
|
||||
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
|
||||
Several example datasets are available in Playground.
|
||||
|
||||
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
|
||||
|
||||
## Credentials {#credentials}
|
||||
|
||||
| Parameter | Value |
|
||||
|:--------------------|:----------------------------------------|
|
||||
| HTTPS endpoint | `https://play-api.clickhouse.com:8443` |
|
||||
| Native TCP endpoint | `play-api.clickhouse.com:9440` |
|
||||
| User | `playground` |
|
||||
| Password | `clickhouse` |
|
||||
|
||||
There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above):
|
||||
|
||||
- 20.3 LTS: `play-api-v20-3.clickhouse.com`
|
||||
- 19.14 LTS: `play-api-v19-14.clickhouse.com`
|
||||
|
||||
!!! note "Note"
|
||||
All these endpoints require a secure TLS connection.
|
||||
|:--------------------|:-----------------------------------|
|
||||
| HTTPS endpoint | `https://play.clickhouse.com:443/` |
|
||||
| Native TCP endpoint | `play.clickhouse.com:9440` |
|
||||
| User | `explorer` or `play` |
|
||||
| Password | (empty) |
|
||||
|
||||
## Limitations {#limitations}
|
||||
|
||||
@ -36,31 +26,18 @@ The queries are executed as a read-only user. It implies some limitations:
|
||||
- DDL queries are not allowed
|
||||
- INSERT queries are not allowed
|
||||
|
||||
The following settings are also enforced:
|
||||
|
||||
- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes)
|
||||
- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows)
|
||||
- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode)
|
||||
- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time)
|
||||
The service also have quotas on its usage.
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
HTTPS endpoint example with `curl`:
|
||||
|
||||
``` bash
|
||||
curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets"
|
||||
curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'"
|
||||
```
|
||||
|
||||
TCP endpoint example with [CLI](../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'"
|
||||
clickhouse client --secure --host play.clickhouse.com --user explorer
|
||||
```
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
||||
ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md).
|
||||
The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add an extra layer of protection and improved global connectivity.
|
||||
|
||||
!!! warning "Warning"
|
||||
Exposing the ClickHouse server to the public internet in any other situation is **strongly not recommended**. Make sure it listens only on a private network and is covered by a properly configured firewall.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user