Merge branch 'master' into avoid_printing_settings

This commit is contained in:
mergify[bot] 2022-07-28 09:08:54 +00:00 committed by GitHub
commit e3ef3a2b41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
374 changed files with 7475 additions and 2602 deletions

View File

@ -394,7 +394,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
@ -437,7 +437,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -477,7 +477,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -521,7 +521,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -560,7 +560,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
EOF
- name: Download json reports

View File

@ -971,7 +971,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
@ -1020,7 +1020,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check (actions)
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
@ -1061,7 +1061,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1098,7 +1098,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release_database_ordinary
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseOrdinary, actions)
CHECK_NAME=Stateless tests (release, DatabaseOrdinary)
REPO_COPY=${{runner.temp}}/stateless_release_database_ordinary/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1135,7 +1135,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, s3 storage, actions)
CHECK_NAME=Stateless tests (release, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1172,7 +1172,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1209,7 +1209,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1248,7 +1248,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1287,7 +1287,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1326,7 +1326,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1365,7 +1365,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1404,7 +1404,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1441,7 +1441,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1480,7 +1480,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1519,7 +1519,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1558,7 +1558,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1597,7 +1597,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1636,7 +1636,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1678,7 +1678,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1715,7 +1715,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release_database_ordinary
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, DatabaseOrdinary, actions)
CHECK_NAME=Stateful tests (release, DatabaseOrdinary)
REPO_COPY=${{runner.temp}}/stateful_release_database_ordinary/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1752,7 +1752,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1789,7 +1789,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1826,7 +1826,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1863,7 +1863,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1900,7 +1900,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1937,7 +1937,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1977,7 +1977,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2017,7 +2017,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2053,7 +2053,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -2089,7 +2089,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -2125,7 +2125,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -2164,7 +2164,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -2202,7 +2202,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -2240,7 +2240,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -2278,7 +2278,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -2316,7 +2316,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -2354,7 +2354,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -2392,7 +2392,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -2430,7 +2430,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -2468,7 +2468,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
@ -2509,7 +2509,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (ASan, actions)
CHECK_NAME=AST fuzzer (ASan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
EOF
- name: Download json reports
@ -2545,7 +2545,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (TSan, actions)
CHECK_NAME=AST fuzzer (TSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
EOF
- name: Download json reports
@ -2581,7 +2581,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (UBSan, actions)
CHECK_NAME=AST fuzzer (UBSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
EOF
- name: Download json reports
@ -2617,7 +2617,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (MSan, actions)
CHECK_NAME=AST fuzzer (MSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
EOF
- name: Download json reports
@ -2653,7 +2653,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (debug, actions)
CHECK_NAME=AST fuzzer (debug)
REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse
EOF
- name: Download json reports
@ -2692,7 +2692,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (asan, actions)
CHECK_NAME=Unit tests (asan)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2728,7 +2728,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang, actions)
CHECK_NAME=Unit tests (release-clang)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2764,7 +2764,7 @@ jobs:
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc, actions)
# CHECK_NAME=Unit tests (release-gcc)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
@ -2800,7 +2800,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (tsan, actions)
CHECK_NAME=Unit tests (tsan)
REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse
EOF
- name: Download json reports
@ -2836,7 +2836,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (msan, actions)
CHECK_NAME=Unit tests (msan)
REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse
EOF
- name: Download json reports
@ -2872,7 +2872,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (ubsan, actions)
CHECK_NAME=Unit tests (ubsan)
REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse
EOF
- name: Download json reports

View File

@ -254,7 +254,7 @@ jobs:
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -301,7 +301,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
BuilderBinRelease:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -345,53 +345,8 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
# BuilderBinGCC:
# needs: [DockerHubPush, FastTest]
# runs-on: [self-hosted, builder]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/build_check
# IMAGES_PATH=${{runner.temp}}/images_path
# REPO_COPY=${{runner.temp}}/build_check/ClickHouse
# CACHES_PATH=${{runner.temp}}/../ccaches
# BUILD_NAME=binary_gcc
# EOF
# - name: Download changed images
# uses: actions/download-artifact@v2
# with:
# name: changed_images
# path: ${{ runner.temp }}/images_path
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# - name: Build
# run: |
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
# - name: Upload build URLs to artifacts
# if: ${{ success() || failure() }}
# uses: actions/upload-artifact@v2
# with:
# name: ${{ env.BUILD_URLS }}
# path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
# - name: Cleanup
# if: always()
# run: |
# # shellcheck disable=SC2046
# docker kill $(docker ps -q) ||:
# # shellcheck disable=SC2046
# docker rm -f $(docker ps -a -q) ||:
# sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAarch64:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -438,7 +393,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAsan:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -483,7 +438,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebUBsan:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -528,7 +483,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebTsan:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -573,7 +528,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebMsan:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -618,7 +573,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebDebug:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -666,7 +621,7 @@ jobs:
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderDebSplitted:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -711,7 +666,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinClangTidy:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -756,7 +711,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwin:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -801,7 +756,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAarch64:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -846,7 +801,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinFreeBSD:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -891,7 +846,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwinAarch64:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -936,7 +891,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinPPC64:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -1029,7 +984,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
@ -1078,7 +1033,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check (actions)
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
@ -1119,7 +1074,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1156,7 +1111,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_database_replicated
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseReplicated, actions)
CHECK_NAME=Stateless tests (release, DatabaseReplicated)
REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1195,7 +1150,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_database_replicated
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseReplicated, actions)
CHECK_NAME=Stateless tests (release, DatabaseReplicated)
REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1234,7 +1189,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_wide_parts
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, wide parts enabled, actions)
CHECK_NAME=Stateless tests (release, wide parts enabled)
REPO_COPY=${{runner.temp}}/stateless_wide_parts/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1271,7 +1226,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, s3 storage, actions)
CHECK_NAME=Stateless tests (release, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1308,7 +1263,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1345,7 +1300,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1384,7 +1339,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1423,7 +1378,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1462,7 +1417,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1501,7 +1456,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1540,7 +1495,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1577,7 +1532,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1616,7 +1571,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1655,7 +1610,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1694,7 +1649,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1733,7 +1688,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1772,7 +1727,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1811,7 +1766,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_flaky_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests flaky check (address, actions)
CHECK_NAME=Stateless tests flaky check (address)
REPO_COPY=${{runner.temp}}/stateless_flaky_asan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1847,7 +1802,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/tests_bugfix_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Tests bugfix validate check (actions)
CHECK_NAME=tests bugfix validate check
KILL_TIMEOUT=3600
REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
EOF
@ -1869,12 +1824,12 @@ jobs:
TEMP_PATH="${TEMP_PATH}/integration" \
REPORTS_PATH="${REPORTS_PATH}/integration" \
python3 integration_test_check.py "Integration tests bugfix validate check" \
python3 integration_test_check.py "Integration $CHECK_NAME" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
TEMP_PATH="${TEMP_PATH}/stateless" \
REPORTS_PATH="${REPORTS_PATH}/stateless" \
python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \
python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
@ -1898,7 +1853,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1935,7 +1890,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1972,7 +1927,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2009,7 +1964,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2046,7 +2001,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2083,7 +2038,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2120,7 +2075,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2160,7 +2115,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2200,7 +2155,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2236,7 +2191,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -2272,7 +2227,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -2308,7 +2263,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -2347,7 +2302,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (ASan, actions)
CHECK_NAME=AST fuzzer (ASan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
EOF
- name: Download json reports
@ -2383,7 +2338,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (TSan, actions)
CHECK_NAME=AST fuzzer (TSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
EOF
- name: Download json reports
@ -2419,7 +2374,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (UBSan, actions)
CHECK_NAME=AST fuzzer (UBSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
EOF
- name: Download json reports
@ -2455,7 +2410,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (MSan, actions)
CHECK_NAME=AST fuzzer (MSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
EOF
- name: Download json reports
@ -2491,7 +2446,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (debug, actions)
CHECK_NAME=AST fuzzer (debug)
REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse
EOF
- name: Download json reports
@ -2530,7 +2485,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -2568,7 +2523,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -2606,7 +2561,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -2644,7 +2599,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -2682,7 +2637,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -2720,7 +2675,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -2758,7 +2713,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -2796,7 +2751,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -2834,7 +2789,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
@ -2872,7 +2827,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan_flaky_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests flaky check (asan, actions)
CHECK_NAME=Integration tests flaky check (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse
EOF
- name: Download json reports
@ -2911,7 +2866,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (asan, actions)
CHECK_NAME=Unit tests (asan)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2947,7 +2902,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang, actions)
CHECK_NAME=Unit tests (release-clang)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2974,42 +2929,6 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
# UnitTestsReleaseGCC:
# needs: [BuilderBinGCC]
# runs-on: [self-hosted, fuzzer-unit-tester]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc, actions)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
# uses: actions/download-artifact@v2
# with:
# path: ${{ env.REPORTS_PATH }}
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# - name: Unit test
# run: |
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci"
# python3 unit_tests_check.py "$CHECK_NAME"
# - name: Cleanup
# if: always()
# run: |
# # shellcheck disable=SC2046
# docker kill $(docker ps -q) ||:
# # shellcheck disable=SC2046
# docker rm -f $(docker ps -a -q) ||:
# sudo rm -fr "$TEMP_PATH"
UnitTestsTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, fuzzer-unit-tester]
@ -3019,7 +2938,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (tsan, actions)
CHECK_NAME=Unit tests (tsan)
REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse
EOF
- name: Download json reports
@ -3055,7 +2974,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (msan, actions)
CHECK_NAME=Unit tests (msan)
REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse
EOF
- name: Download json reports
@ -3091,7 +3010,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (ubsan, actions)
CHECK_NAME=Unit tests (ubsan)
REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse
EOF
- name: Download json reports

View File

@ -473,7 +473,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
@ -517,7 +517,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -554,7 +554,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -591,7 +591,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -630,7 +630,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -669,7 +669,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -708,7 +708,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -747,7 +747,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -786,7 +786,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -823,7 +823,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -862,7 +862,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -901,7 +901,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -940,7 +940,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -979,7 +979,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1018,7 +1018,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1060,7 +1060,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1097,7 +1097,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1134,7 +1134,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1171,7 +1171,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1208,7 +1208,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1245,7 +1245,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1282,7 +1282,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1322,7 +1322,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -1362,7 +1362,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -1398,7 +1398,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -1434,7 +1434,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -1470,7 +1470,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -1509,7 +1509,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -1547,7 +1547,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -1585,7 +1585,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -1623,7 +1623,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -1661,7 +1661,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -1699,7 +1699,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -1737,7 +1737,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -1775,7 +1775,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -1813,7 +1813,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2

3
.gitmodules vendored
View File

@ -277,9 +277,6 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing.git
[submodule "contrib/base-x"]
path = contrib/base-x
url = https://github.com/ClickHouse/base-x.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares

View File

@ -1,17 +1,18 @@
### Table of Contents
**[ClickHouse release v22.7, 2022-07-21](#226)**<br>
**[ClickHouse release v22.6, 2022-06-16](#226)**<br>
**[ClickHouse release v22.5, 2022-05-19](#225)**<br>
**[ClickHouse release v22.4, 2022-04-20](#224)**<br>
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br>
**[ClickHouse release v22.2, 2022-02-17](#222)**<br>
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br>
**[ClickHouse release v22.7, 2022-07-21](#227)**<br/>
**[ClickHouse release v22.6, 2022-06-16](#226)**<br/>
**[ClickHouse release v22.5, 2022-05-19](#225)**<br/>
**[ClickHouse release v22.4, 2022-04-20](#224)**<br/>
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br/>
**[ClickHouse release v22.2, 2022-02-17](#222)**<br/>
**[ClickHouse release v22.1, 2022-01-18](#221)**<br/>
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br/>
### <a id="227"></a> ClickHouse release 22.7, 2022-07-21
#### Upgrade Notes
* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable `format_csv_allow_single_quotes` by default. See [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). ([Kruglov Pavel](https://github.com/Avogar)).
* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back.

View File

@ -554,6 +554,16 @@ macro (clickhouse_add_executable target)
endif()
endmacro()
# With cross-compiling, all targets are built for the target platform which usually different from the host
# platform. This is problematic if a build artifact X (e.g. a file or an executable) is generated by running
# another executable Y previously produced in the build. This is solved by compiling and running Y for/on
# the host platform. Add target to the list:
# add_native_target(<target> ...)
set_property (GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
function (add_native_target)
set_property (GLOBAL APPEND PROPERTY NATIVE_BUILD_TARGETS ${ARGV})
endfunction (add_native_target)
set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.")
include_directories(${ConfigIncludePath})
@ -568,3 +578,33 @@ add_subdirectory (tests)
add_subdirectory (utils)
include (cmake/sanitize_target_link_libraries.cmake)
# Build native targets if necessary
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
if (NATIVE_BUILD_TARGETS
AND NOT(
CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR
)
)
message (STATUS "Building native targets...")
set (NATIVE_BUILD_DIR "${CMAKE_BINARY_DIR}/native")
execute_process(
COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}"
COMMAND_ECHO STDOUT)
execute_process(
COMMAND ${CMAKE_COMMAND}
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
${CMAKE_SOURCE_DIR}
WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
COMMAND_ECHO STDOUT)
execute_process(
COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS}
COMMAND_ECHO STDOUT)
endif ()

View File

@ -669,18 +669,18 @@ std::string JSON::getName() const
return getString();
}
StringRef JSON::getRawString() const
std::string_view JSON::getRawString() const
{
Pos s = ptr_begin;
if (*s != '"')
throw JSONException(std::string("JSON: expected \", got ") + *s);
while (++s != ptr_end && *s != '"');
if (s != ptr_end)
return StringRef(ptr_begin + 1, s - ptr_begin - 1);
return std::string_view(ptr_begin + 1, s - ptr_begin - 1);
throw JSONException("JSON: incorrect syntax (expected end of string, found end of JSON).");
}
StringRef JSON::getRawName() const
std::string_view JSON::getRawName() const
{
return getRawString();
}

View File

@ -136,8 +136,8 @@ public:
std::string getName() const; /// Получить имя name-value пары.
JSON getValue() const; /// Получить значение name-value пары.
StringRef getRawString() const;
StringRef getRawName() const;
std::string_view getRawString() const;
std::string_view getRawName() const;
/// Получить значение элемента; если элемент - строка, то распарсить значение из строки; если не строка или число - то исключение.
double toDouble() const;

View File

@ -1,68 +1,192 @@
#include <sys/auxv.h>
#include "atomic.h"
#include <unistd.h> // __environ
#include <sys/auxv.h>
#include <fcntl.h> // open
#include <sys/stat.h> // O_RDONLY
#include <unistd.h> // read, close
#include <stdlib.h> // ssize_t
#include <stdio.h> // perror, fprintf
#include <link.h> // ElfW
#include <errno.h>
// We don't have libc struct available here. Compute aux vector manually.
static unsigned long * __auxv = NULL;
static unsigned long __auxv_secure = 0;
#define ARRAY_SIZE(a) sizeof((a))/sizeof((a[0]))
static size_t __find_auxv(unsigned long type)
/// Suppress TSan since it is possible for this code to be called from multiple threads,
/// and initialization is safe to be done multiple times from multiple threads.
#if defined(__clang__)
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
#else
# define NO_SANITIZE_THREAD
#endif
// We don't have libc struct available here.
// Compute aux vector manually (from /proc/self/auxv).
//
// Right now there is only 51 AT_* constants,
// so 64 should be enough until this implementation will be replaced with musl.
static unsigned long __auxv_procfs[64];
static unsigned long __auxv_secure = 0;
// Common
static unsigned long * __auxv_environ = NULL;
static void * volatile getauxval_func;
static unsigned long __auxv_init_environ(unsigned long type);
//
// auxv from procfs interface
//
ssize_t __retry_read(int fd, void * buf, size_t count)
{
for (;;)
{
ssize_t ret = read(fd, buf, count);
if (ret == -1)
{
if (errno == EINTR)
{
continue;
}
perror("Cannot read /proc/self/auxv");
abort();
}
return ret;
}
}
unsigned long NO_SANITIZE_THREAD __getauxval_procfs(unsigned long type)
{
if (type == AT_SECURE)
{
return __auxv_secure;
}
if (type >= ARRAY_SIZE(__auxv_procfs))
{
errno = ENOENT;
return 0;
}
return __auxv_procfs[type];
}
static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type)
{
// For debugging:
// - od -t dL /proc/self/auxv
// - LD_SHOW_AUX= ls
int fd = open("/proc/self/auxv", O_RDONLY);
// It is possible in case of:
// - no procfs mounted
// - on android you are not able to read it unless running from shell or debugging
// - some other issues
if (fd == -1)
{
// Fallback to environ.
a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__auxv_init_environ);
return __auxv_init_environ(type);
}
ElfW(auxv_t) aux;
/// NOTE: sizeof(aux) is very small (less then PAGE_SIZE), so partial read should not be possible.
_Static_assert(sizeof(aux) < 4096, "Unexpected sizeof(aux)");
while (__retry_read(fd, &aux, sizeof(aux)) == sizeof(aux))
{
if (aux.a_type == AT_NULL)
{
break;
}
if (aux.a_type == AT_IGNORE || aux.a_type == AT_IGNOREPPC)
{
continue;
}
if (aux.a_type >= ARRAY_SIZE(__auxv_procfs))
{
fprintf(stderr, "AT_* is out of range: %li (maximum allowed is %zu)\n", aux.a_type, ARRAY_SIZE(__auxv_procfs));
abort();
}
if (__auxv_procfs[aux.a_type])
{
/// It is possible due to race on initialization.
}
__auxv_procfs[aux.a_type] = aux.a_un.a_val;
}
close(fd);
__auxv_secure = __getauxval_procfs(AT_SECURE);
// Now we've initialized __auxv_procfs, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__getauxval_procfs);
return __getauxval_procfs(type);
}
//
// auxv from environ interface
//
// NOTE: environ available only after static initializers,
// so you cannot rely on this if you need getauxval() before.
//
// Good example of such user is sanitizers, for example
// LSan will not work with __auxv_init_environ(),
// since it needs getauxval() before.
//
static size_t NO_SANITIZE_THREAD __find_auxv(unsigned long type)
{
size_t i;
for (i = 0; __auxv[i]; i += 2)
for (i = 0; __auxv_environ[i]; i += 2)
{
if (__auxv[i] == type)
if (__auxv_environ[i] == type)
{
return i + 1;
}
}
return (size_t) -1;
}
unsigned long __getauxval(unsigned long type)
unsigned long NO_SANITIZE_THREAD __getauxval_environ(unsigned long type)
{
if (type == AT_SECURE)
return __auxv_secure;
if (__auxv)
if (__auxv_environ)
{
size_t index = __find_auxv(type);
if (index != ((size_t) -1))
return __auxv[index];
return __auxv_environ[index];
}
errno = ENOENT;
return 0;
}
static void * volatile getauxval_func;
static unsigned long __auxv_init(unsigned long type)
static unsigned long NO_SANITIZE_THREAD __auxv_init_environ(unsigned long type)
{
if (!__environ)
{
// __environ is not initialized yet so we can't initialize __auxv right now.
// __environ is not initialized yet so we can't initialize __auxv_environ right now.
// That's normally occurred only when getauxval() is called from some sanitizer's internal code.
errno = ENOENT;
return 0;
}
// Initialize __auxv and __auxv_secure.
// Initialize __auxv_environ and __auxv_secure.
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
__auxv_environ = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
__auxv_secure = __auxv_environ[secure_idx];
// Now we've initialized __auxv, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval);
// Now we need to switch to __getauxval_environ for all later calls, since
// everything is initialized.
a_cas_p(&getauxval_func, (void *)__auxv_init_environ, (void *)__getauxval_environ);
return __getauxval(type);
return __getauxval_environ(type);
}
// First time getauxval() will call __auxv_init().
static void * volatile getauxval_func = (void *)__auxv_init;
// Callchain:
// - __auxv_init_procfs -> __getauxval_environ
// - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ
static void * volatile getauxval_func = (void *)__auxv_init_procfs;
unsigned long getauxval(unsigned long type)
{

View File

@ -156,8 +156,7 @@ endif()
add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry)
add_contrib (base-x-cmake base-x)
add_contrib(c-ares-cmake c-ares)
add_contrib (c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.

2
contrib/avro vendored

@ -1 +1 @@
Subproject commit e43c46e87fd32eafdc09471e95344555454c5ef8
Subproject commit 7832659ec986075d560f930c288e973c64679552

1
contrib/base-x vendored

@ -1 +0,0 @@
Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5

View File

@ -1,28 +0,0 @@
option (ENABLE_BASEX "Enable base-x" ${ENABLE_LIBRARIES})
if (NOT ENABLE_BASEX)
message(STATUS "Not using base-x")
return()
endif()
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x")
set (SRCS
${LIBRARY_DIR}/base_x.hh
${LIBRARY_DIR}/uinteger_t.hh
)
add_library(_base-x INTERFACE)
target_include_directories(_base-x SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/base-x")
if (XCODE OR XCODE_VERSION)
# https://gitlab.kitware.com/cmake/cmake/issues/17457
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
# This applies to Xcode.
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
endif ()
target_sources(_base-x PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
endif ()
add_library(ch_contrib::base-x ALIAS _base-x)

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit 5e23e96c0c02e451dbb291cf9f66231d02b6cdb6
Subproject commit 3f975ecab377cd5f739af780566596128f17bb74

View File

@ -119,5 +119,20 @@ ENV GOCACHE=/workdir/
RUN mkdir /workdir && chmod 777 /workdir
WORKDIR /workdir
# FIXME: thread sanitizer is broken in clang-14, we have to build it with clang-13
# https://github.com/ClickHouse/ClickHouse/pull/39450
# https://github.com/google/sanitizers/issues/1540
# https://github.com/google/sanitizers/issues/1552
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-13 main" >> \
/etc/apt/sources.list.d/clang.list \
&& apt-get update \
&& apt-get install \
clang-13 \
clang-tidy-13 \
--yes --no-install-recommends \
&& apt-get clean
COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -323,6 +323,7 @@ if __name__ == "__main__":
parser.add_argument(
"--compiler",
choices=(
"clang-13", # For TSAN builds, see #39450
"clang-14",
"clang-14-darwin",
"clang-14-darwin-aarch64",

View File

@ -7,29 +7,26 @@ set -x
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
#
# But under thread fuzzer, TSan build is too slow and this produces some flaky
# tests, so for now, as a temporary solution it had been disabled.
if ! test -f package_folder/clickhouse-server*tsan*.deb; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
fi
function install_packages()
{

View File

@ -0,0 +1,36 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.6.4.35-stable FIXME as compared to v22.6.3.35-stable
#### Build/Testing/Packaging Improvement
* Backported in [#38822](https://github.com/ClickHouse/ClickHouse/issues/38822): - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#38242](https://github.com/ClickHouse/ClickHouse/issues/38242): Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#38865](https://github.com/ClickHouse/ClickHouse/issues/38865): Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#38853](https://github.com/ClickHouse/ClickHouse/issues/38853): Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#38942](https://github.com/ClickHouse/ClickHouse/issues/38942): - Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#39063](https://github.com/ClickHouse/ClickHouse/issues/39063): Any allocations inside OvercommitTracker may lead to deadlock. Logging was not very informative so it's easier just to remove logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#39077](https://github.com/ClickHouse/ClickHouse/issues/39077): Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#39151](https://github.com/ClickHouse/ClickHouse/issues/39151): Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#39275](https://github.com/ClickHouse/ClickHouse/issues/39275): Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#39371](https://github.com/ClickHouse/ClickHouse/issues/39371): Declare RabbitMQ queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)).
* Backported in [#39352](https://github.com/ClickHouse/ClickHouse/issues/39352): Fix incorrect fetch postgresql tables query fro PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### NO CL CATEGORY
* Backported in [#38685](https://github.com/ClickHouse/ClickHouse/issues/38685):. [#38449](https://github.com/ClickHouse/ClickHouse/pull/38449) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Use native Map type for OpenTelemetry attributes [#38814](https://github.com/ClickHouse/ClickHouse/pull/38814) ([Ilya Yatsishin](https://github.com/qoega)).
* Retry docker buildx commands with progressive sleep in between [#38898](https://github.com/ClickHouse/ClickHouse/pull/38898) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add docker_server.py running to backport and release CIs [#39011](https://github.com/ClickHouse/ClickHouse/pull/39011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix meilisearch tests [#39110](https://github.com/ClickHouse/ClickHouse/pull/39110) ([Kseniia Sumarokova](https://github.com/kssenii)).

View File

@ -119,16 +119,9 @@ On CentOS, RedHat run `sudo yum install cmake ninja-build`.
If you use Arch or Gentoo, you probably know it yourself how to install CMake.
For installing CMake and Ninja on Mac OS X first install Homebrew and then install everything else via brew:
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
brew install cmake ninja
Next, check the version of CMake: `cmake --version`. If it is below 3.12, you should install a newer version from the website: https://cmake.org/download/.
## C++ Compiler {#c-compiler}
Compilers Clang starting from version 11 is supported for building ClickHouse.
Compilers Clang starting from version 12 is supported for building ClickHouse.
Clang should be used instead of gcc. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations.
@ -138,9 +131,6 @@ On Ubuntu/Debian you can use the automatic installation script (check [official
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
```
Mac OS X build is also supported. Just run `brew install llvm`
## The Building Process {#the-building-process}
Now that you are ready to build ClickHouse we recommend you to create a separate directory `build` inside `ClickHouse` that will contain all of the build artefacts:

View File

@ -692,9 +692,7 @@ auto s = std::string{"Hello"};
**1.** Virtual inheritance is not used.
**2.** Exception specifiers from C++03 are not used.
**3.** Constructs which have convenient syntactic sugar in modern C++, e.g.
**2.** Constructs which have convenient syntactic sugar in modern C++, e.g.
```
// Traditional way without syntactic sugar
@ -745,7 +743,7 @@ But other things being equal, cross-platform or portable code is preferred.
**2.** Language: C++20 (see the list of available [C++20 features](https://en.cppreference.com/w/cpp/compiler_support#C.2B.2B20_features)).
**3.** Compiler: `clang`. At this time (April 2021), the code is compiled using clang version 11. (It can also be compiled using `gcc` version 10, but it's untested and not suitable for production usage).
**3.** Compiler: `clang`. At the time of writing (July 2022), the code is compiled using clang version >= 12. (It can also be compiled using `gcc`, but it's untested and not suitable for production usage).
The standard library is used (`libc++`).
@ -755,7 +753,7 @@ The standard library is used (`libc++`).
The CPU instruction set is the minimum supported set among our servers. Currently, it is SSE 4.2.
**6.** Use `-Wall -Wextra -Werror` compilation flags. Also `-Weverything` is used with few exceptions.
**6.** Use `-Wall -Wextra -Werror -Weverything` compilation flags with a few exception.
**7.** Use static linking with all libraries except those that are difficult to connect to statically (see the output of the `ldd` command).

View File

@ -81,11 +81,11 @@ $ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
## Performance Tests {#performance-tests}
Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Tests are located at `tests/performance`. Each test is represented by `.xml` file with description of test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation.
Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Performance tests are located at `tests/performance/`. Each test is represented by an `.xml` file with a description of the test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation.
Each test run one or multiple queries (possibly with combinations of parameters) in a loop.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other `perf` tools during your tests.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. Also, it is recommended to write performance tests when you add or modify SQL functions which are relatively isolated and not too obscure. It always makes sense to use `perf top` or other `perf` tools during your tests.
## Test Tools and Scripts {#test-tools-and-scripts}

View File

@ -482,9 +482,9 @@ For example:
## Projections {#projections}
Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries.
Projections are an experimental feature. To enable them you must set the [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) to `1`. See also the [force_optimize_projection](../../../operations/settings/settings.md#force-optimize-projection) setting.
::: note
When you are implementing projections you should also consider the [force_optimize_projection](../../../operations/settings/settings.md#force-optimize-projection) setting.
:::
Projections are not supported in the `SELECT` statements with the [FINAL](../../../sql-reference/statements/select/from.md#select-from-final) modifier.
### Projection Query {#projection-query}

View File

@ -194,18 +194,25 @@ Differs from the `TabSeparated` format in that the column names are written in t
During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
This format is also available under the name `TSVWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes}
Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
The first row with names is processed the same way as in `TabSeparatedWithNames` format.
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
This format is also available under the name `TSVWithNamesAndTypes`.
@ -451,10 +458,24 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## CSVWithNamesAndTypes {#csvwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## CustomSeparated {#format-customseparated}
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](../operations/settings/settings.md#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](../operations/settings/settings.md#format_custom_field_delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](../operations/settings/settings.md#format_custom_result_after_delimiter) settings, not from format strings.
@ -465,10 +486,24 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## SQLInsert {#sqlinsert}
Outputs data as a sequence of `INSERT INTO table (columns...) VALUES (...), (...) ...;` statements.
@ -911,18 +946,46 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie
Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes}
Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## JSONCompactStringsEachRowWithNames {#jsoncompactstringseachrowwithnames}
Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## JSONCompactStringsEachRowWithNamesAndTypes {#jsoncompactstringseachrowwithnamesandtypes}
Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
```json
["num", "str", "arr"]
["Int32", "String", "Array(UInt8)"]
@ -1199,6 +1262,12 @@ Similar to [RowBinary](#rowbinary), but with added header:
- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N)
- N `String`s specifying column names
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes}
Similar to [RowBinary](#rowbinary), but with added header:
@ -1207,6 +1276,14 @@ Similar to [RowBinary](#rowbinary), but with added header:
- N `String`s specifying column names
- N `String`s specifying column types
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## Values {#data-format-values}
Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces arent inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../sql-reference/syntax.md) is represented as `NULL`.

View File

@ -67,7 +67,7 @@ Features:
### Grafana {#grafana}
[Grafana](https://grafana.com/grafana/plugins/vertamedia-clickhouse-datasource) is a platform for monitoring and visualization.
[Grafana](https://grafana.com/grafana/plugins/grafana-clickhouse-datasource/) is a platform for monitoring and visualization.
"Grafana allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data driven culture. Trusted and loved by the community" &mdash; grafana.com.

View File

@ -29,7 +29,7 @@ Structure of the `users` section:
<profile>profile_name</profile>
<quota>default</quota>
<default_database>default<default_database>
<default_database>default</default_database>
<databases>
<database_name>
<table_name>

View File

@ -302,18 +302,34 @@ Default value: `ALL`.
Specifies [JOIN](../../sql-reference/statements/select/join.md) algorithm.
Several algorithms can be specified, and an available one would be chosen for a particular query based on kind/strictness and table engine.
Possible values:
- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used.
- `partial_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) is used.
- `prefer_partial_merge` — ClickHouse always tries to use `merge` join if possible.
- `auto` — ClickHouse tries to change `hash` join to `merge` join on the fly to avoid out of memory.
- `default``hash` or `direct`, if possible (same as `direct,hash`)
Default value: `hash`.
- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
When using `hash` algorithm the right part of `JOIN` is uploaded into RAM.
- `parallel_hash` - a variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
- `partial_merge` — a variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
When using `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
- `direct` - can be applied when the right storage supports key-value requests.
The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs.
- `auto` — try `hash` join and switch on the fly to another algorithm if the memory limit is violated.
- `full_sorting_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
- `prefer_partial_merge` — ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
When using `partial_merge` algorithm ClickHouse sorts the data and dumps it to the disk. The `merge` algorithm in ClickHouse differs a bit from the classic realization. First ClickHouse sorts the right table by [join key](../../sql-reference/statements/select/join.md#select-join) in blocks and creates min-max index for sorted blocks. Then it sorts parts of left table by `join key` and joins them over right table. The min-max index is also used to skip unneeded right table blocks.
## join_any_take_last_row {#settings-join_any_take_last_row}

View File

@ -4,7 +4,7 @@ sidebar_position: 6
# any
Selects the first encountered value.
Selects the first encountered (non-NULL) value, unless all rows have NULL values in that column.
The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
To get a determinate result, you can use the min or max function instead of any.

View File

@ -5,9 +5,9 @@ sidebar_label: Sources of External Dictionaries
# Sources of External Dictionaries
An external dictionary can be connected from many different sources.
An external dictionary can be connected to ClickHouse from many different sources.
If dictionary is configured using xml-file, the configuration looks like this:
If the dictionary is configured using an xml-file, the configuration looks like this:
``` xml
<clickhouse>
@ -24,7 +24,7 @@ If dictionary is configured using xml-file, the configuration looks like this:
</clickhouse>
```
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), equal configuration will looks like:
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), the configuration described above will look like:
``` sql
CREATE DICTIONARY dict_name (...)
@ -96,7 +96,7 @@ Setting fields:
- `path` The absolute path to the file.
- `format` The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
When dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in `user_files` directory, to prevent DB users accessing arbitrary file on ClickHouse node.
When a dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in the `user_files` directory to prevent DB users from accessing arbitrary files on the ClickHouse node.
**See Also**
@ -104,7 +104,7 @@ When dictionary with source `FILE` is created via DDL command (`CREATE DICTIONAR
## Executable File
Working with executable files depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable files STDIN. Otherwise, ClickHouse starts executable file and treats its output as dictionary data.
Working with executable files depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable files STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
Example of settings:
@ -120,22 +120,22 @@ Example of settings:
Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `command` — The absolute path to the executable file, or the file name (if the command's directory is in the `PATH`).
- `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `command_termination_timeout`The executable script should contain a main read-write loop. After the dictionary is destroyed, the pipe is closed, and the executable file will have `command_termination_timeout` seconds to shutdown before ClickHouse will send a SIGTERM signal to the child process. `command_termination_timeout` is specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - Timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - Timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
## Executable Pool
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, `complex_key_direct` layouts.
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts.
Executable pool will spawn pool of processes with specified command and keep them running until they exit. The program should read data from STDIN while it is available and output result to STDOUT, and it can wait for next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early.
Executable pool will spawn a pool of processes with the specified command and keep them running until they exit. The program should read data from STDIN while it is available and output the result to STDOUT. It can wait for the next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data, but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early.
Example of settings:
@ -555,7 +555,11 @@ Setting fields:
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
MySQL can be connected on a local host via sockets. To do this, set `host` and `socket`.
:::note
There is no explicit parameter `secure`. When establishing an SSL-connection security is mandatory.
:::
MySQL can be connected to on a local host via sockets. To do this, set `host` and `socket`.
Example of settings:
@ -815,4 +819,4 @@ Setting fields:
:::note
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
:::

View File

@ -836,7 +836,7 @@ Result:
## now
Returns the current date and time.
Returns the current date and time at the moment of query analysis. The function is a constant expression.
**Syntax**
@ -884,14 +884,20 @@ Result:
└──────────────────────┘
```
## nowInBlock
Returns the current date and time at the moment of processing of each block of data. In contrast to the function `now`, it is not a constant expression, and the returned value will be different in different blocks for long-running queries.
It makes sense to use this function to generate the current time in long-running INSERT SELECT queries.
## today
Accepts zero arguments and returns the current date at one of the moments of request execution.
Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as toDate(now()).
## yesterday
Accepts zero arguments and returns yesterdays date at one of the moments of request execution.
Accepts zero arguments and returns yesterdays date at one of the moments of query analysis.
The same as today() - 1.
## timeSlot

View File

@ -494,22 +494,21 @@ If the s string is non-empty and does not contain the c character at
Returns the string s that was converted from the encoding in from to the encoding in to.
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(encoded_text[, alphabet_name])
## Base58Encode(plaintext), Base58Decode(encoded_text)
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using specified alphabet.
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet.
**Syntax**
```sql
base58Encode(decoded[, alphabet_name])
base58Decode(encoded[, alphabet_name])
base58Encode(decoded)
base58Decode(encoded)
```
**Arguments**
- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant.
- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown.
- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `bitcoin`.
**Returned value**
@ -522,17 +521,17 @@ Type: [String](../../sql-reference/data-types/string.md).
Query:
``` sql
SELECT base58Encode('encode', 'flickr');
SELECT base58Decode('izCFiDUY', 'ripple');
SELECT base58Encode('Encoded');
SELECT base58Encode('3dc8KtHrwM');
```
Result:
```text
┌─base58Encode('encode', 'flickr')─┐
SvyTHb1D
┌─encodeBase58('Encoded')─┐
3dc8KtHrwM
└──────────────────────────────────┘
┌─base58Decode('izCFiDUY', 'ripple')─┐
decode
┌─decodeBase58('3dc8KtHrwM')─┐
Encoded
└────────────────────────────────────┘
```

View File

@ -96,7 +96,7 @@ For more information, see the link: [RE2](https://github.com/google/re2/blob/mas
## translate(s, from, to)
The function replaces characters in the string s in accordance with one-to-one character mapping defined by from and to strings. from and to must be ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
The function replaces characters in the string s in accordance with one-to-one character mapping defined by from and to strings. from and to must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
Example:
@ -112,7 +112,7 @@ SELECT translate('Hello, World!', 'delor', 'DELOR') AS res
## translateUTF8(string, from, to)
Similar to previous function, but works with UTF-8 arguments. from and to must be valid UTF-8 strings of the same size.
Similar to previous function, but works with UTF-8 arguments. from and to must be valid constant UTF-8 strings of the same size.
Example:

View File

@ -250,10 +250,12 @@ High compression levels are useful for asymmetric scenarios, like compress once,
#### DEFLATE_QPL
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library, which has dependency on Intel Hardware:
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:
- DEFLATE_QPL is only supported on systems with AVX2/AVX512/IAA.
- DEFLATE_QPL-compressed data can only be transferred between nodes with AVX2/AVX512/IAA.
- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`.
- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions
- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device
- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512
### Specialized Codecs

View File

@ -11,7 +11,7 @@ Compressed files are supported. Compression type is detected by the extension of
**Syntax**
```sql
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type [LEVEL level]]
SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL level]]
```
`file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
@ -23,6 +23,7 @@ SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type [LEVEL level]]
- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail.
- The query will fail if a file with the same file name already exists.
- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it.
- If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output.
**Example**

View File

@ -36,7 +36,7 @@ Additional join types available in ClickHouse:
- `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types.
- `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below.
:::note
:::note
When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
:::
@ -64,7 +64,7 @@ Rows are joined if the whole complex condition is met. If the conditions are not
The `OR` operator inside the `ON` clause works using the hash join algorithm — for each `OR` argument with join keys for `JOIN`, a separate hash table is created, so memory consumption and query execution time grow linearly with an increase in the number of expressions `OR` of the `ON` clause.
:::note
:::note
If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far.
:::
@ -83,7 +83,7 @@ Consider `table_1` and `table_2`:
Query with one join key condition and an additional condition for `table_2`:
``` sql
SELECT name, text FROM table_1 LEFT OUTER JOIN table_2
SELECT name, text FROM table_1 LEFT OUTER JOIN table_2
ON table_1.Id = table_2.Id AND startsWith(table_2.text, 'Text');
```
@ -100,7 +100,7 @@ Note that the result contains the row with the name `C` and the empty text colum
Query with `INNER` type of a join and multiple conditions:
``` sql
SELECT name, text, scores FROM table_1 INNER JOIN table_2
SELECT name, text, scores FROM table_1 INNER JOIN table_2
ON table_1.Id = table_2.Id AND table_2.scores > 10 AND startsWith(table_2.text, 'Text');
```
@ -199,7 +199,7 @@ For example, consider the following tables:
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined.
:::note
:::note
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
:::

View File

@ -18,7 +18,6 @@ sidebar_label: "Используемые сторонние библиотеки
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) |
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |

View File

@ -15,16 +15,15 @@ $ make
Генерация данных:
:::danger "Внимание"
:::warning "Внимание"
-s 100 dbgen генерирует 600 миллионов строк (67 ГБ)
-s 1000 dbgen генерирует 6 миллиардов строк (занимает много времени)
:::
``` bash
$ ./dbgen -s 1000 -T c
$ ./dbgen -s 1000 -T l
$ ./dbgen -s 1000 -T p
$ ./dbgen -s 1000 -T s
$ ./dbgen -s 1000 -T d
```
Создание таблиц в Кликхауз:
@ -105,11 +104,10 @@ $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
``` sql
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
AS SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,

View File

@ -19,6 +19,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
@ -52,6 +53,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnames) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
@ -171,6 +173,12 @@ SELECT * FROM nestedt FORMAT TSV
При парсинге первая строка должна содержать имена столбцов. Вы можете использовать имена столбцов, чтобы указать их порядок расположения, или чтобы проверить их корректность.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
Этот формат также доступен под именем `TSVWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes}
@ -178,6 +186,14 @@ SELECT * FROM nestedt FORMAT TSV
Отличается от формата `TabSeparated` тем, что в первой строке пишутся имена столбцов, а во второй - типы столбцов.
При парсинге, первая и вторая строка полностью игнорируется.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
Этот формат также доступен под именем `TSVWithNamesAndTypes`.
## Template {#format-template}
@ -374,6 +390,24 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
Выводит также заголовок, аналогично [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## CSVWithNamesAndTypes {#csvwithnamesandtypes}
В первой строке пишутся имена столбцов, а во второй - типы столбцов, аналогично [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes)
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## CustomSeparated {#format-customseparated}
Аналогичен [Template](#format-template), но выводит (или считывает) все имена и типы столбцов, используя для них правило экранирования из настройки [format_custom_escaping_rule](../operations/settings/settings.md#format-custom-escaping-rule) и разделители из настроек [format_custom_field_delimiter](../operations/settings/settings.md#format-custom-field-delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format-custom-row-before-delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format-custom-row-after-delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format-custom-row-between-delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format-custom-result-before-delimiter) и [format_custom_result_after_delimiter](../operations/settings/settings.md#format-custom-result-after-delimiter), а не из форматных строк.
@ -384,10 +418,24 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
Выводит также заголовок с именами столбцов, аналогичен формату [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Выводит также два заголовка с именами и типами столбцов, аналогичен формату [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## JSON {#json}
Выводит данные в формате JSON. Кроме таблицы с данными, также выводятся имена и типы столбцов, и некоторая дополнительная информация - общее количество выведенных строк, а также количество строк, которое могло бы быть выведено, если бы не было LIMIT-а. Пример:
@ -660,6 +708,14 @@ SELECT * FROM json_square_brackets;
Отличается от `JSONCompactEachRow`/`JSONCompactStringsEachRow` тем, что имена и типы столбцов записываются как первые две строки.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
```json
["'hello'", "multiply(42, number)", "range(5)"]
["String", "UInt64", "Array(UInt8)"]
@ -904,6 +960,20 @@ Array представлены как длина в формате varint (unsig
Для поддержки [NULL](../sql-reference/syntax.md#null-literal) перед каждым значением типа [Nullable](../sql-reference/data-types/nullable.md) следует байт содержащий 1 или 0. Если байт 1, то значение равно NULL, и этот байт интерпретируется как отдельное значение (т.е. после него следует значение следующего поля). Если байт 0, то после байта следует значение поля (не равно NULL).
## RowBinaryWithNames {#rowbinarywithnames}
То же самое что [RowBinary](#rowbinary), но добавляется заголовок:
- Количество колонок - N, закодированное [LEB128](https://en.wikipedia.org/wiki/LEB128),
- N строк (`String`) с именами колонок,
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes}
То же самое что [RowBinary](#rowbinary), но добавляется заголовок:
@ -912,6 +982,14 @@ Array представлены как длина в формате varint (unsig
- N строк (`String`) с именами колонок,
- N строк (`String`) с типами колонок.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## Values {#data-format-values}
Выводит каждую строку в скобках. Строки разделены запятыми. После последней строки запятой нет. Значения внутри скобок также разделены запятыми. Числа выводятся в десятичном виде без кавычек. Массивы выводятся в квадратных скобках. Строки, даты, даты-с-временем выводятся в кавычках. Правила экранирования и особенности парсинга аналогичны формату [TabSeparated](#tabseparated). При форматировании, лишние пробелы не ставятся, а при парсинге - допустимы и пропускаются (за исключением пробелов внутри значений типа массив, которые недопустимы). [NULL](../sql-reference/syntax.md) представляется как `NULL`.

View File

@ -527,7 +527,7 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
- [Использование вложенных структур](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format.
## input_format_with_names_use_header {#settings-input-format-with-names-use-header}
## input_format_with_names_use_header {#input_format_with_names_use_header}
Включает или отключает проверку порядка столбцов при вставке данных.
@ -535,8 +535,38 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
Поддерживаемые форматы:
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [CSVWithNamesAndTypes](../../interfaces/formats.md#csvwithnamesandtypes)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
- [JSONCompactEachRowWithNames](../../interfaces/formats.md#jsoncompacteachrowwithnames)
- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md#jsoncompactstringseachrowwithnames)
- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
- [RowBinaryWithNames](../../interfaces/formats.md#rowbinarywithnames)
- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes)
- [CustomSeparatedWithNames](../../interfaces/formats.md#customseparatedwithnames)
- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md#customseparatedwithnamesandtypes)
Возможные значения:
- 0 — выключена.
- 1 — включена.
Значение по умолчанию: 1.
## input_format_with_types_use_header {#input_format_with_types_use_header}
Определяет, должен ли синтаксический анализатор формата проверять, соответствуют ли типы данных из входных данных типам данных из целевой таблицы.
Поддерживаемые форматы:
- [CSVWithNamesAndTypes](../../interfaces/formats.md#csvwithnamesandtypes)
- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes)
- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md#customseparatedwithnamesandtypes)
Возможные значения:
@ -626,8 +656,9 @@ ClickHouse может парсить только базовый формат `Y
Изменяет поведение операций, выполняемых со строгостью `ANY`.
:::danger "Внимание"
:::warning "Внимание"
Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../../engines/table-engines/special/join.md).
:::
Возможные значения:
@ -2082,8 +2113,9 @@ SELECT * FROM test_table
Устанавливает приоритет ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) для потоков, исполняющих запросы. Планировщик ОС учитывает эти приоритеты при выборе следующего потока для исполнения на доступном ядре CPU.
:::danger "Предупреждение"
:::warning "Предупреждение"
Для использования этой настройки необходимо установить свойство `CAP_SYS_NICE`. Пакет `clickhouse-server` устанавливает его во время инсталляции. Некоторые виртуальные окружения не позволяют установить `CAP_SYS_NICE`. В этом случае, `clickhouse-server` выводит сообщение при запуске.
:::
Допустимые значения:

View File

@ -5,7 +5,7 @@ sidebar_label: AggregateFunction
# AggregateFunction {#data-type-aggregatefunction}
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create.md#create-view). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
`AggregateFunction(name, types_of_arguments…)` — параметрический тип данных.
@ -63,5 +63,4 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP
## Пример использования {#primer-ispolzovaniia}
Смотрите в описании движка [AggregatingMergeTree](../../sql-reference/data-types/aggregatefunction.md).
Смотрите в описании движка [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md).

View File

@ -542,6 +542,7 @@ SOURCE(MYSQL(
:::info "Примечание"
Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`.
Явный параметр `secure` отсутствует. Автоматически поддержана работа в обоих случаях: когда установка SSL-соединения необходима и когда нет.
MySQL можно подключить на локальном хосте через сокеты, для этого необходимо задать `host` и `socket`.

View File

@ -490,22 +490,21 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2);
Возвращает сконвертированную из кодировки from в кодировку to строку s.
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(plaintext[, alphabet_name]) {#base58}
## Base58Encode(plaintext), Base58Decode(encoded_text) {#base58}
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием указанного алфавита.
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием стандартного алфавита Bitcoin.
**Синтаксис**
```sql
base58Encode(decoded[, alphabet_name])
base58Decode(encoded[, alphabet_name])
encodeBase58(decoded)
decodeBase58(encoded)
```
**Аргументы**
- `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md).
- `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`.
- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `bitcoin`.
**Возвращаемое значение**
@ -518,16 +517,16 @@ base58Decode(encoded[, alphabet_name])
Запрос:
``` sql
SELECT base58Encode('encode', 'flickr');
SELECT base58Decode('izCFiDUY', 'ripple');
SELECT encodeBase58('encode');
SELECT decodeBase58('izCFiDUY');
```
Результат:
```text
┌─base58Encode('encode', 'flickr')─┐
┌─encodeBase58('encode', 'flickr')─┐
│ SvyTHb1D │
└──────────────────────────────────┘
┌─base58Decode('izCFiDUY', 'ripple')─┐
┌─decodeBase58('izCFiDUY', 'ripple')─┐
│ decode │
└────────────────────────────────────┘
```

View File

@ -85,7 +85,7 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
## translate(s, from, to)
Данная функция заменяет символы в строке s в соответствии с поэлементным отображением определяемым строками from и to. from и to должны быть корректными ASCII строками одного размера. Не ASCII символы в оригинальной строке не изменяются.
Данная функция заменяет символы в строке s в соответствии с поэлементным отображением определяемым строками from и to. from и to должны быть корректными константными ASCII строками одного размера. Не ASCII символы в оригинальной строке не изменяются.
Example:
@ -101,7 +101,7 @@ SELECT translate('Hello, World!', 'delor', 'DELOR') AS res
## translateUTF8(string, from, to)
Аналогично предыдущей функции, но работает со строками, состоящими из UTF-8 символов. from и to должны быть корректными UTF-8 строками одного размера.
Аналогично предыдущей функции, но работает со строками, состоящими из UTF-8 символов. from и to должны быть корректными константными UTF-8 строками одного размера.
Example:

View File

@ -15,15 +15,14 @@ $ make
开始生成数据:
!!! warning "注意"
:::warning "注意"
使用`-s 100`dbgen 将生成 6 亿行数据(67GB), 如果使用`-s 1000`它会生成 60 亿行数据(这需要很多时间))
:::
```bash
$ ./dbgen -s 1000 -T c
$ ./dbgen -s 1000 -T l
$ ./dbgen -s 1000 -T p
$ ./dbgen -s 1000 -T s
$ ./dbgen -s 1000 -T d
```
在 ClickHouse 中创建数据表:
@ -106,10 +105,8 @@ $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
AS SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,

View File

@ -18,7 +18,7 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A
option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"
${ENABLE_CLICKHOUSE_ALL})
if (CLICKHOUSE_SPLIT_BINARY OR NOT ENABLE_UTILS)
if (CLICKHOUSE_SPLIT_BINARY)
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" OFF)
else ()
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON)
@ -434,6 +434,9 @@ else ()
endif ()
set (CLICKHOUSE_BUNDLE)
if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
list(APPEND CLICKHOUSE_BUNDLE self-extracting)
endif ()
if (ENABLE_CLICKHOUSE_SERVER)
add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -39,14 +39,19 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
try
{
DB::KeeperStorage storage(500, "", true);
auto keeper_context = std::make_shared<KeeperContext>();
keeper_context->digest_enabled = true;
DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
storage.initializeSystemNodes();
DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
DB::KeeperSnapshotManager manager(options["output-dir"].as<std::string>(), 1);
DB::KeeperSnapshotManager manager(options["output-dir"].as<std::string>(), 1, keeper_context);
auto snp = manager.serializeSnapshotToBuffer(snapshot);
auto path = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
std::cout << "Snapshot serialized to path:" << path << std::endl;

View File

@ -34,6 +34,10 @@
#include <base/bit_cast.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <memory>
#include <cmath>
#include <unistd.h>
@ -95,6 +99,9 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_SEEK_THROUGH_FILE;
extern const int UNKNOWN_FORMAT_VERSION;
extern const int INCORRECT_NUMBER_OF_COLUMNS;
extern const int TYPE_MISMATCH;
}
@ -115,6 +122,12 @@ public:
/// Deterministically change seed to some other value. This can be used to generate more values than were in source.
virtual void updateSeed() = 0;
/// Save into file. Binary, platform-dependent, version-dependent serialization.
virtual void serialize(WriteBuffer & out) const = 0;
/// Read from file
virtual void deserialize(ReadBuffer & in) = 0;
virtual ~IModel() = default;
};
@ -189,6 +202,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -230,6 +245,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -279,6 +296,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -311,6 +330,8 @@ class IdentityModel : public IModel
public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -395,6 +416,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -431,6 +454,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -469,6 +494,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -512,6 +539,26 @@ struct MarkovModelParameters
size_t frequency_add;
double frequency_desaturate;
size_t determinator_sliding_window_size;
void serialize(WriteBuffer & out) const
{
writeBinary(order, out);
writeBinary(frequency_cutoff, out);
writeBinary(num_buckets_cutoff, out);
writeBinary(frequency_add, out);
writeBinary(frequency_desaturate, out);
writeBinary(determinator_sliding_window_size, out);
}
void deserialize(ReadBuffer & in)
{
readBinary(order, in);
readBinary(frequency_cutoff, in);
readBinary(num_buckets_cutoff, in);
readBinary(frequency_add, in);
readBinary(frequency_desaturate, in);
readBinary(determinator_sliding_window_size, in);
}
};
@ -565,6 +612,39 @@ private:
return END;
}
void serialize(WriteBuffer & out) const
{
writeBinary(total, out);
writeBinary(count_end, out);
size_t size = buckets.size();
writeBinary(size, out);
for (const auto & elem : buckets)
{
writeBinary(elem.first, out);
writeBinary(elem.second, out);
}
}
void deserialize(ReadBuffer & in)
{
readBinary(total, in);
readBinary(count_end, in);
size_t size = 0;
readBinary(size, in);
buckets.reserve(size);
for (size_t i = 0; i < size; ++i)
{
Buckets::value_type elem;
readBinary(elem.first, in);
readBinary(elem.second, in);
buckets.emplace(std::move(elem));
}
}
};
using Table = HashMap<NGramHash, Histogram, TrivialHash>;
@ -621,6 +701,37 @@ public:
explicit MarkovModel(MarkovModelParameters params_)
: params(std::move(params_)), code_points(params.order, BEGIN) {}
void serialize(WriteBuffer & out) const
{
params.serialize(out);
size_t size = table.size();
writeBinary(size, out);
for (const auto & elem : table)
{
writeBinary(elem.getKey(), out);
elem.getMapped().serialize(out);
}
}
void deserialize(ReadBuffer & in)
{
params.deserialize(in);
size_t size = 0;
readBinary(size, in);
table.reserve(size);
for (size_t i = 0; i < size; ++i)
{
NGramHash key{};
readBinary(key, in);
Histogram & histogram = table[key];
histogram.deserialize(in);
}
}
void consume(const char * data, size_t size)
{
/// First 'order' number of code points are pre-filled with BEGIN.
@ -655,7 +766,6 @@ public:
}
}
void finalize()
{
if (params.num_buckets_cutoff)
@ -878,6 +988,16 @@ public:
{
seed = hash(seed);
}
void serialize(WriteBuffer & out) const override
{
markov_model.serialize(out);
}
void deserialize(ReadBuffer & in) override
{
markov_model.deserialize(in);
}
};
@ -916,6 +1036,16 @@ public:
{
nested_model->updateSeed();
}
void serialize(WriteBuffer & out) const override
{
nested_model->serialize(out);
}
void deserialize(ReadBuffer & in) override
{
nested_model->deserialize(in);
}
};
@ -954,6 +1084,16 @@ public:
{
nested_model->updateSeed();
}
void serialize(WriteBuffer & out) const override
{
nested_model->serialize(out);
}
void deserialize(ReadBuffer & in) override
{
nested_model->deserialize(in);
}
};
@ -1046,6 +1186,18 @@ public:
for (auto & model : models)
model->updateSeed();
}
void serialize(WriteBuffer & out) const
{
for (const auto & model : models)
model->serialize(out);
}
void deserialize(ReadBuffer & in)
{
for (auto & model : models)
model->deserialize(in);
}
};
}
@ -1068,8 +1220,10 @@ try
("input-format", po::value<std::string>(), "input format of the initial table data")
("output-format", po::value<std::string>(), "default output format")
("seed", po::value<std::string>(), "seed (arbitrary string), must be random string with at least 10 bytes length; note that a seed for each column is derived from this seed and a column name: you can obfuscate data for different tables and as long as you use identical seed and identical column names, the data for corresponding non-text columns for different tables will be transformed in the same way, so the data for different tables can be JOINed after obfuscation")
("limit", po::value<UInt64>(), "if specified - stop after generating that number of rows")
("limit", po::value<UInt64>(), "if specified - stop after generating that number of rows; the limit can be also greater than the number of source dataset - in this case it will process the dataset in a loop more than one time, using different seeds on every iteration, generating result as large as needed")
("silent", po::value<bool>()->default_value(false), "don't print information messages to stderr")
("save", po::value<std::string>(), "save the models after training to the specified file. You can use --limit 0 to skip the generation step. The file is using binary, platform-dependent, opaque serialization format. The model parameters are saved, while the seed is not.")
("load", po::value<std::string>(), "load the models instead of training from the specified file. The table structure must match the saved file. The seed should be specified separately, while other model parameters are loaded.")
("order", po::value<UInt64>()->default_value(5), "order of markov model to generate strings")
("frequency-cutoff", po::value<UInt64>()->default_value(5), "frequency cutoff for markov model: remove all buckets with count less than specified")
("num-buckets-cutoff", po::value<UInt64>()->default_value(0), "cutoff for number of different possible continuations for a context: remove all histograms with less than specified number of buckets")
@ -1096,12 +1250,26 @@ try
return 0;
}
if (options.count("save") && options.count("load"))
{
std::cerr << "The options --save and --load cannot be used together.\n";
return 1;
}
UInt64 seed = sipHash64(options["seed"].as<std::string>());
std::string structure = options["structure"].as<std::string>();
std::string input_format = options["input-format"].as<std::string>();
std::string output_format = options["output-format"].as<std::string>();
std::string load_from_file;
std::string save_into_file;
if (options.count("load"))
load_from_file = options["load"].as<std::string>();
else if (options.count("save"))
save_into_file = options["save"].as<std::string>();
UInt64 limit = 0;
if (options.count("limit"))
limit = options["limit"].as<UInt64>();
@ -1117,7 +1285,7 @@ try
markov_model_params.frequency_desaturate = options["frequency-desaturate"].as<double>();
markov_model_params.determinator_sliding_window_size = options["determinator-sliding-window-size"].as<UInt64>();
// Create header block
/// Create the header block
std::vector<std::string> structure_vals;
boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on);
@ -1143,6 +1311,7 @@ try
ReadBufferFromFileDescriptor file_in(STDIN_FILENO);
WriteBufferFromFileDescriptor file_out(STDOUT_FILENO);
if (load_from_file.empty())
{
/// stdin must be seekable
auto res = lseek(file_in.getFD(), 0, SEEK_SET);
@ -1156,6 +1325,9 @@ try
/// Train step
UInt64 source_rows = 0;
bool rewind_needed = false;
if (load_from_file.empty())
{
if (!silent)
std::cerr << "Training models\n";
@ -1173,11 +1345,71 @@ try
if (!silent)
std::cerr << "Processed " << source_rows << " rows\n";
}
obfuscator.finalize();
rewind_needed = true;
}
else
{
if (!silent)
std::cerr << "Loading models\n";
ReadBufferFromFile model_file_in(load_from_file);
CompressedReadBuffer model_in(model_file_in);
UInt8 version = 0;
readBinary(version, model_in);
if (version != 0)
throw Exception("Unknown version of the model file", ErrorCodes::UNKNOWN_FORMAT_VERSION);
readBinary(source_rows, model_in);
Names data_types = header.getDataTypeNames();
size_t header_size = 0;
readBinary(header_size, model_in);
if (header_size != data_types.size())
throw Exception("The saved model was created for different number of columns", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS);
for (size_t i = 0; i < header_size; ++i)
{
String type;
readBinary(type, model_in);
if (type != data_types[i])
throw Exception("The saved model was created for different types of columns", ErrorCodes::TYPE_MISMATCH);
}
obfuscator.deserialize(model_in);
}
obfuscator.finalize();
if (!save_into_file.empty())
{
if (!silent)
std::cerr << "Saving models\n";
if (!limit)
WriteBufferFromFile model_file_out(save_into_file);
CompressedWriteBuffer model_out(model_file_out, CompressionCodecFactory::instance().get("ZSTD", 1));
/// You can change version on format change, it is currently set to zero.
UInt8 version = 0;
writeBinary(version, model_out);
writeBinary(source_rows, model_out);
/// We are writing the data types for validation, because the models serialization depends on the data types.
Names data_types = header.getDataTypeNames();
size_t header_size = data_types.size();
writeBinary(header_size, model_out);
for (const auto & type : data_types)
writeBinary(type, model_out);
/// Write the models.
obfuscator.serialize(model_out);
model_out.finalize();
model_file_out.finalize();
}
if (!options.count("limit"))
limit = source_rows;
/// Generation step
@ -1187,7 +1419,8 @@ try
if (!silent)
std::cerr << "Generating data\n";
file_in.seek(0, SEEK_SET);
if (rewind_needed)
file_in.rewind();
Pipe pipe(context->getInputFormat(input_format, file_in, header, max_block_size));
@ -1220,6 +1453,7 @@ try
out_executor.finish();
obfuscator.updateSeed();
rewind_needed = true;
}
return 0;

View File

@ -1,6 +1,18 @@
if (NOT(
CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR
)
)
set (COMPRESSOR "${CMAKE_BINARY_DIR}/native/utils/self-extracting-executable/pre_compressor")
set (DECOMPRESSOR "--decompressor=${CMAKE_BINARY_DIR}/utils/self-extracting-executable/decompressor")
else ()
set (COMPRESSOR "${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor")
endif ()
add_custom_target (self-extracting ALL
${CMAKE_COMMAND} -E remove clickhouse
COMMAND ${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor clickhouse ../clickhouse
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse
DEPENDS clickhouse compressor
)

View File

@ -13,20 +13,20 @@ using FileInfo = IBackupCoordination::FileInfo;
BackupCoordinationLocal::BackupCoordinationLocal() = default;
BackupCoordinationLocal::~BackupCoordinationLocal() = default;
void BackupCoordinationLocal::setStatus(const String &, const String &, const String &)
void BackupCoordinationLocal::setStage(const String &, const String &, const String &)
{
}
void BackupCoordinationLocal::setErrorStatus(const String &, const Exception &)
void BackupCoordinationLocal::setError(const String &, const Exception &)
{
}
Strings BackupCoordinationLocal::waitStatus(const Strings &, const String &)
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &)
{
return {};
}
Strings BackupCoordinationLocal::waitStatusFor(const Strings &, const String &, UInt64)
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
{
return {};
}

View File

@ -20,10 +20,10 @@ public:
BackupCoordinationLocal();
~BackupCoordinationLocal() override;
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;

View File

@ -165,55 +165,94 @@ namespace
constexpr size_t NUM_ATTEMPTS = 10;
}
BackupCoordinationRemote::BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
BackupCoordinationRemote::BackupCoordinationRemote(
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("BackupCoordination"))
, remove_zk_nodes_in_destructor(remove_zk_nodes_in_destructor_)
{
createRootNodes();
stage_sync.emplace(
zookeeper_path_ + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("BackupCoordination"));
}
BackupCoordinationRemote::~BackupCoordinationRemote() = default;
BackupCoordinationRemote::~BackupCoordinationRemote()
{
try
{
if (remove_zk_nodes_in_destructor)
removeAllNodes();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
zkutil::ZooKeeperPtr BackupCoordinationRemote::getZooKeeper() const
{
std::lock_guard lock{mutex};
return getZooKeeperNoLock();
}
zkutil::ZooKeeperPtr BackupCoordinationRemote::getZooKeeperNoLock() const
{
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
}
return zookeeper;
}
void BackupCoordinationRemote::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_mutations", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_access", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_infos", "");
zookeeper->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
auto zk = getZooKeeper();
zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path, "");
zk->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zk->createIfNotExists(zookeeper_path + "/repl_mutations", "");
zk->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zk->createIfNotExists(zookeeper_path + "/repl_access", "");
zk->createIfNotExists(zookeeper_path + "/file_names", "");
zk->createIfNotExists(zookeeper_path + "/file_infos", "");
zk->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
}
void BackupCoordinationRemote::removeAllNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->removeRecursive(zookeeper_path);
/// Usually this function is called by the initiator when a backup is complete so we don't need the coordination anymore.
///
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
/// while some hosts are still making the backup. Removing all the nodes will remove the parent node of the backup coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some useless part
/// of their backup work before that. Anyway in this case backup won't be finalized (because only an initiator can do that).
auto zk = getZooKeeper();
zk->removeRecursive(zookeeper_path);
}
void BackupCoordinationRemote::setStatus(const String & current_host, const String & new_status, const String & message)
void BackupCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
{
status_sync.set(current_host, new_status, message);
stage_sync->set(current_host, new_stage, message);
}
void BackupCoordinationRemote::setErrorStatus(const String & current_host, const Exception & exception)
void BackupCoordinationRemote::setError(const String & current_host, const Exception & exception)
{
status_sync.setError(current_host, exception);
stage_sync->setError(current_host, exception);
}
Strings BackupCoordinationRemote::waitStatus(const Strings & all_hosts, const String & status_to_wait)
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
{
return status_sync.wait(all_hosts, status_to_wait);
return stage_sync->wait(all_hosts, stage_to_wait);
}
Strings BackupCoordinationRemote::waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return status_sync.waitFor(all_hosts, status_to_wait, timeout_ms);
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
@ -229,11 +268,11 @@ void BackupCoordinationRemote::addReplicatedPartNames(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedPartNames() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zookeeper->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
zk->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
}
Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
@ -255,11 +294,11 @@ void BackupCoordinationRemote::addReplicatedMutations(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedMutations() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zookeeper->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent);
zk->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent);
}
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
@ -279,11 +318,11 @@ void BackupCoordinationRemote::addReplicatedDataPath(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedDataPath() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(data_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
}
Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const
@ -300,18 +339,18 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
return;
replicated_tables.emplace();
auto zookeeper = get_zookeeper();
auto zk = getZooKeeperNoLock();
{
String path = zookeeper_path + "/repl_part_names";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto part_names = ReplicatedPartNames::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
auto part_names = ReplicatedPartNames::deserialize(zk->get(path2 + "/" + escaped_replica_name));
replicated_tables->addPartNames(table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
}
}
@ -319,14 +358,14 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
{
String path = zookeeper_path + "/repl_mutations";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto mutations = ReplicatedMutations::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
auto mutations = ReplicatedMutations::deserialize(zk->get(path2 + "/" + escaped_replica_name));
replicated_tables->addMutations(table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations);
}
}
@ -334,11 +373,11 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
{
String path = zookeeper_path + "/repl_data_paths";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_data_path : zookeeper->getChildren(path2))
for (const String & escaped_data_path : zk->getChildren(path2))
{
String data_path = unescapeForFileName(escaped_data_path);
replicated_tables->addDataPath(table_shared_id, data_path);
@ -356,13 +395,13 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedAccessFilePath() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_access/" + escapeForFileName(access_zk_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + host_id;
zookeeper->createIfNotExists(path, file_path);
zk->createIfNotExists(path, file_path);
}
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
@ -378,20 +417,20 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
return;
replicated_access.emplace();
auto zookeeper = get_zookeeper();
auto zk = getZooKeeperNoLock();
String path = zookeeper_path + "/repl_access";
for (const String & escaped_access_zk_path : zookeeper->getChildren(path))
for (const String & escaped_access_zk_path : zk->getChildren(path))
{
String access_zk_path = unescapeForFileName(escaped_access_zk_path);
String path2 = path + "/" + escaped_access_zk_path;
for (const String & type_str : zookeeper->getChildren(path2))
for (const String & type_str : zk->getChildren(path2))
{
AccessEntityType type = AccessEntityTypeInfo::parseType(type_str);
String path3 = path2 + "/" + type_str;
for (const String & host_id : zookeeper->getChildren(path3))
for (const String & host_id : zk->getChildren(path3))
{
String file_path = zookeeper->get(path3 + "/" + host_id);
String file_path = zk->get(path3 + "/" + host_id);
replicated_access->addFilePath(access_zk_path, type, host_id, file_path);
}
}
@ -401,11 +440,11 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
void BackupCoordinationRemote::addFileInfo(const FileInfo & file_info, bool & is_data_file_required)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String full_path = zookeeper_path + "/file_names/" + escapeForFileName(file_info.file_name);
String size_and_checksum = serializeSizeAndChecksum(std::pair{file_info.size, file_info.checksum});
zookeeper->create(full_path, size_and_checksum, zkutil::CreateMode::Persistent);
zk->create(full_path, size_and_checksum, zkutil::CreateMode::Persistent);
if (!file_info.size)
{
@ -414,7 +453,7 @@ void BackupCoordinationRemote::addFileInfo(const FileInfo & file_info, bool & is
}
full_path = zookeeper_path + "/file_infos/" + size_and_checksum;
auto code = zookeeper->tryCreate(full_path, serializeFileInfo(file_info), zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(full_path, serializeFileInfo(file_info), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, full_path);
@ -426,15 +465,15 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
if (!file_info.size)
return; /// we don't keep FileInfos for empty files, nothing to update
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum = serializeSizeAndChecksum(std::pair{file_info.size, file_info.checksum});
String full_path = zookeeper_path + "/file_infos/" + size_and_checksum;
for (size_t attempt = 0; attempt < NUM_ATTEMPTS; ++attempt)
{
Coordination::Stat stat;
auto new_info = deserializeFileInfo(zookeeper->get(full_path, &stat));
auto new_info = deserializeFileInfo(zk->get(full_path, &stat));
new_info.archive_suffix = file_info.archive_suffix;
auto code = zookeeper->trySet(full_path, serializeFileInfo(new_info), stat.version);
auto code = zk->trySet(full_path, serializeFileInfo(new_info), stat.version);
if (code == Coordination::Error::ZOK)
return;
bool is_last_attempt = (attempt == NUM_ATTEMPTS - 1);
@ -445,16 +484,16 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
std::vector<FileInfo> file_infos;
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
for (const String & escaped_name : escaped_names)
{
String size_and_checksum = zookeeper->get(zookeeper_path + "/file_names/" + escaped_name);
String size_and_checksum = zk->get(zookeeper_path + "/file_names/" + escaped_name);
UInt64 size = deserializeSizeAndChecksum(size_and_checksum).first;
FileInfo file_info;
if (size) /// we don't keep FileInfos for empty files
file_info = deserializeFileInfo(zookeeper->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info = deserializeFileInfo(zk->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info.file_name = unescapeForFileName(escaped_name);
file_infos.emplace_back(std::move(file_info));
}
@ -463,8 +502,8 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
Strings BackupCoordinationRemote::listFiles(const String & directory, bool recursive) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
auto zk = getZooKeeper();
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
@ -496,8 +535,8 @@ Strings BackupCoordinationRemote::listFiles(const String & directory, bool recur
bool BackupCoordinationRemote::hasFiles(const String & directory) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
auto zk = getZooKeeper();
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
@ -515,42 +554,42 @@ bool BackupCoordinationRemote::hasFiles(const String & directory) const
std::optional<FileInfo> BackupCoordinationRemote::getFileInfo(const String & file_name) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum;
if (!zookeeper->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
if (!zk->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
return std::nullopt;
UInt64 size = deserializeSizeAndChecksum(size_and_checksum).first;
FileInfo file_info;
if (size) /// we don't keep FileInfos for empty files
file_info = deserializeFileInfo(zookeeper->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info = deserializeFileInfo(zk->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info.file_name = file_name;
return file_info;
}
std::optional<FileInfo> BackupCoordinationRemote::getFileInfo(const SizeAndChecksum & size_and_checksum) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String file_info_str;
if (!zookeeper->tryGet(zookeeper_path + "/file_infos/" + serializeSizeAndChecksum(size_and_checksum), file_info_str))
if (!zk->tryGet(zookeeper_path + "/file_infos/" + serializeSizeAndChecksum(size_and_checksum), file_info_str))
return std::nullopt;
return deserializeFileInfo(file_info_str);
}
std::optional<SizeAndChecksum> BackupCoordinationRemote::getFileSizeAndChecksum(const String & file_name) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum;
if (!zookeeper->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
if (!zk->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
return std::nullopt;
return deserializeSizeAndChecksum(size_and_checksum);
}
String BackupCoordinationRemote::getNextArchiveSuffix()
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/archive_suffixes/a";
String path_created;
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::PersistentSequential, path_created);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::PersistentSequential, path_created);
if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, path);
return formatArchiveSuffix(extractCounterFromSequentialNodeName(path_created));
@ -558,16 +597,11 @@ String BackupCoordinationRemote::getNextArchiveSuffix()
Strings BackupCoordinationRemote::getAllArchiveSuffixes() const
{
auto zookeeper = get_zookeeper();
Strings node_names = zookeeper->getChildren(zookeeper_path + "/archive_suffixes");
auto zk = getZooKeeper();
Strings node_names = zk->getChildren(zookeeper_path + "/archive_suffixes");
for (auto & node_name : node_names)
node_name = formatArchiveSuffix(extractCounterFromSequentialNodeName(node_name));
return node_names;
}
void BackupCoordinationRemote::drop()
{
removeAllNodes();
}
}

View File

@ -3,7 +3,7 @@
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationReplicatedAccess.h>
#include <Backups/BackupCoordinationReplicatedTables.h>
#include <Backups/BackupCoordinationStatusSync.h>
#include <Backups/BackupCoordinationStageSync.h>
namespace DB
@ -13,13 +13,13 @@ namespace DB
class BackupCoordinationRemote : public IBackupCoordination
{
public:
BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_);
BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_);
~BackupCoordinationRemote() override;
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void addReplicatedPartNames(
const String & table_shared_id,
@ -56,9 +56,9 @@ public:
String getNextArchiveSuffix() override;
Strings getAllArchiveSuffixes() const override;
void drop() override;
private:
zkutil::ZooKeeperPtr getZooKeeper() const;
zkutil::ZooKeeperPtr getZooKeeperNoLock() const;
void createRootNodes();
void removeAllNodes();
void prepareReplicatedTables() const;
@ -66,10 +66,12 @@ private:
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
const bool remove_zk_nodes_in_destructor;
BackupCoordinationStatusSync status_sync;
std::optional<BackupCoordinationStageSync> stage_sync;
mutable std::mutex mutex;
mutable zkutil::ZooKeeperPtr zookeeper;
mutable std::optional<BackupCoordinationReplicatedTables> replicated_tables;
mutable std::optional<BackupCoordinationReplicatedAccess> replicated_access;
};

View File

@ -0,0 +1,13 @@
#include <Backups/BackupCoordinationStage.h>
#include <fmt/format.h>
namespace DB
{
String BackupCoordinationStage::formatGatheringMetadata(size_t pass)
{
return fmt::format("{} ({})", GATHERING_METADATA, pass);
}
}

View File

@ -0,0 +1,41 @@
#pragma once
#include <base/types.h>
namespace DB
{
namespace BackupCoordinationStage
{
/// Finding all tables and databases which we're going to put to the backup and collecting their metadata.
constexpr const char * GATHERING_METADATA = "gathering metadata";
String formatGatheringMetadata(size_t pass);
/// Making temporary hard links and prepare backup entries.
constexpr const char * EXTRACTING_DATA_FROM_TABLES = "extracting data from tables";
/// Running special tasks for replicated tables which can also prepare some backup entries.
constexpr const char * RUNNING_POST_TASKS = "running post-tasks";
/// Writing backup entries to the backup and removing temporary hard links.
constexpr const char * WRITING_BACKUP = "writing backup";
/// Finding databases and tables in the backup which we're going to restore.
constexpr const char * FINDING_TABLES_IN_BACKUP = "finding tables in backup";
/// Creating databases or finding them and checking their definitions.
constexpr const char * CREATING_DATABASES = "creating databases";
/// Creating tables or finding them and checking their definition.
constexpr const char * CREATING_TABLES = "creating tables";
/// Inserting restored data to tables.
constexpr const char * INSERTING_DATA_TO_TABLES = "inserting data to tables";
/// Coordination stage meaning that a host finished its work.
constexpr const char * COMPLETED = "completed";
}
}

View File

@ -0,0 +1,201 @@
#include <Backups/BackupCoordinationStageSync.h>
#include <Common/Exception.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
}
BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, log(log_)
{
createRootNodes();
}
void BackupCoordinationStageSync::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message)
{
auto zookeeper = get_zookeeper();
/// Make an ephemeral node so the initiator can track if the current host is still working.
String alive_node_path = zookeeper_path + "/alive|" + current_host;
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
throw zkutil::KeeperException(code, alive_node_path);
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
zookeeper->create(zookeeper_path + "/current|" + current_host + "|" + new_stage, message, zkutil::CreateMode::Persistent);
}
void BackupCoordinationStageSync::setError(const String & current_host, const Exception & exception)
{
auto zookeeper = get_zookeeper();
WriteBufferFromOwnString buf;
writeStringBinary(current_host, buf);
writeException(exception, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
}
Strings BackupCoordinationStageSync::wait(const Strings & all_hosts, const String & stage_to_wait)
{
return waitImpl(all_hosts, stage_to_wait, {});
}
Strings BackupCoordinationStageSync::waitFor(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return waitImpl(all_hosts, stage_to_wait, timeout);
}
namespace
{
struct UnreadyHostState
{
bool started = false;
bool alive = false;
};
}
struct BackupCoordinationStageSync::State
{
Strings results;
std::map<String, UnreadyHostState> unready_hosts;
std::optional<std::pair<String, Exception>> error;
std::optional<String> host_terminated;
};
BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
{
std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};
State state;
if (zk_nodes_set.contains("error"))
{
ReadBufferFromOwnString buf{zookeeper->get(zookeeper_path + "/error")};
String host;
readStringBinary(host, buf);
state.error = std::make_pair(host, readException(buf, fmt::format("Got error from {}", host)));
return state;
}
for (const auto & host : all_hosts)
{
if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
{
UnreadyHostState unready_host_state;
unready_host_state.started = zk_nodes_set.contains("started|" + host);
unready_host_state.alive = zk_nodes_set.contains("alive|" + host);
state.unready_hosts.emplace(host, unready_host_state);
if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
state.host_terminated = host;
}
}
if (state.host_terminated || !state.unready_hosts.empty())
return state;
state.results.reserve(all_hosts.size());
for (const auto & host : all_hosts)
state.results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
return state;
}
Strings BackupCoordinationStageSync::waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const
{
if (all_hosts.empty())
return {};
/// Wait until all hosts are ready or an error happens or time is out.
auto zookeeper = get_zookeeper();
/// Set by ZooKepper when list of zk nodes have changed.
auto watch = std::make_shared<Poco::Event>();
bool use_timeout = timeout.has_value();
std::chrono::steady_clock::time_point end_of_timeout;
if (use_timeout)
end_of_timeout = std::chrono::steady_clock::now() + std::chrono::duration_cast<std::chrono::steady_clock::duration>(*timeout);
State state;
String previous_unready_host; /// Used for logging: we don't want to log the same unready host again.
for (;;)
{
/// Get zk nodes and subscribe on their changes.
Strings zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
/// Read and analyze the current state of zk nodes.
state = readCurrentState(zookeeper, zk_nodes, all_hosts, stage_to_wait);
if (state.error || state.host_terminated || state.unready_hosts.empty())
break; /// Error happened or everything is ready.
/// Log that we will wait for another host.
const auto & unready_host = state.unready_hosts.begin()->first;
if (unready_host != previous_unready_host)
{
LOG_TRACE(log, "Waiting for host {}", unready_host);
previous_unready_host = unready_host;
}
/// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed.
{
if (use_timeout)
{
auto current_time = std::chrono::steady_clock::now();
if ((current_time > end_of_timeout)
|| !watch->tryWait(std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time).count()))
break;
}
else
{
watch->wait();
}
}
}
/// Rethrow an error raised originally on another host.
if (state.error)
state.error->second.rethrow();
/// Another host terminated without errors.
if (state.host_terminated)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Host {} suddenly stopped working", *state.host_terminated);
/// Something's unready, timeout is probably not enough.
if (!state.unready_hosts.empty())
{
const auto & [unready_host, unready_host_state] = *state.unready_hosts.begin();
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Waited for host {} too long (> {}){}",
unready_host,
to_string(*timeout),
unready_host_state.started ? "" : ": Operation didn't start");
}
return state.results;
}
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <Common/ZooKeeper/Common.h>
namespace DB
{
/// Used to coordinate hosts so all hosts would come to a specific stage at around the same time.
class BackupCoordinationStageSync
{
public:
BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_stage, const String & message);
void setError(const String & current_host, const Exception & exception);
/// Sets the stage of the current host and waits until all hosts come to the same stage.
/// The function returns the messages all hosts set when they come to the required stage.
Strings wait(const Strings & all_hosts, const String & stage_to_wait);
/// Almost the same as setAndWait() but this one stops waiting and throws an exception after a specific amount of time.
Strings waitFor(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout);
private:
void createRootNodes();
struct State;
State readCurrentState(zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
Poco::Logger * log;
};
}

View File

@ -1,182 +0,0 @@
#include <Backups/BackupCoordinationStatusSync.h>
#include <Common/Exception.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
}
BackupCoordinationStatusSync::BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, log(log_)
{
createRootNodes();
}
void BackupCoordinationStatusSync::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status, const String & message)
{
auto zookeeper = get_zookeeper();
zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + new_status, message);
}
void BackupCoordinationStatusSync::setError(const String & current_host, const Exception & exception)
{
auto zookeeper = get_zookeeper();
Exception exception2 = exception;
exception2.addMessage("Host {}", current_host);
WriteBufferFromOwnString buf;
writeException(exception2, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
}
Strings BackupCoordinationStatusSync::wait(const Strings & all_hosts, const String & status_to_wait)
{
return waitImpl(all_hosts, status_to_wait, {});
}
Strings BackupCoordinationStatusSync::waitFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
{
return waitImpl(all_hosts, status_to_wait, timeout_ms);
}
Strings BackupCoordinationStatusSync::waitImpl(const Strings & all_hosts, const String & status_to_wait, std::optional<UInt64> timeout_ms)
{
if (all_hosts.empty())
return {};
/// Wait for other hosts.
Strings ready_hosts_results;
ready_hosts_results.resize(all_hosts.size());
std::map<String, std::vector<size_t> /* index in `ready_hosts_results` */> unready_hosts;
for (size_t i = 0; i != all_hosts.size(); ++i)
unready_hosts[all_hosts[i]].push_back(i);
std::optional<Exception> error;
auto zookeeper = get_zookeeper();
/// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`.
auto process_zk_nodes = [&](const Strings & zk_nodes)
{
for (const String & zk_node : zk_nodes)
{
if (zk_node.starts_with("remove_watch-"))
continue;
if (zk_node == "error")
{
ReadBufferFromOwnString buf{zookeeper->get(zookeeper_path + "/error")};
error = readException(buf, "", true);
break;
}
size_t separator_pos = zk_node.find('|');
if (separator_pos == String::npos)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node);
String host = zk_node.substr(0, separator_pos);
String status = zk_node.substr(separator_pos + 1);
auto it = unready_hosts.find(host);
if ((it != unready_hosts.end()) && (status == status_to_wait))
{
String result = zookeeper->get(zookeeper_path + "/" + zk_node);
for (size_t i : it->second)
ready_hosts_results[i] = result;
unready_hosts.erase(it);
}
}
};
/// Wait until all hosts are ready or an error happens or time is out.
std::atomic<bool> watch_set = false;
std::condition_variable watch_triggered_event;
auto watch_callback = [&](const Coordination::WatchResponse &)
{
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
watch_triggered_event.notify_all();
};
auto watch_triggered = [&] { return !watch_set; };
bool use_timeout = timeout_ms.has_value();
std::chrono::milliseconds timeout{timeout_ms.value_or(0)};
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
std::chrono::steady_clock::duration elapsed;
std::mutex dummy_mutex;
String previous_unready_host;
while (!unready_hosts.empty() && !error)
{
watch_set = true;
Strings nodes = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
process_zk_nodes(nodes);
if (!unready_hosts.empty() && !error)
{
const auto & unready_host = unready_hosts.begin()->first;
if (unready_host != previous_unready_host)
{
LOG_TRACE(log, "Waiting for host {}", unready_host);
previous_unready_host = unready_host;
}
std::unique_lock dummy_lock{dummy_mutex};
if (use_timeout)
{
elapsed = std::chrono::steady_clock::now() - start_time;
if ((elapsed > timeout) || !watch_triggered_event.wait_for(dummy_lock, timeout - elapsed, watch_triggered))
break;
}
else
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
}
if (watch_set)
{
/// Remove watch by triggering it.
zookeeper->create(zookeeper_path + "/remove_watch-", "", zkutil::CreateMode::EphemeralSequential);
std::unique_lock dummy_lock{dummy_mutex};
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
if (error)
error->rethrow();
if (!unready_hosts.empty())
{
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Waited for host {} too long ({})",
unready_hosts.begin()->first,
to_string(elapsed));
}
return ready_hosts_results;
}
}

View File

@ -1,37 +0,0 @@
#pragma once
#include <Common/ZooKeeper/Common.h>
namespace DB
{
/// Used to coordinate hosts so all hosts would come to a specific status at around the same time.
class BackupCoordinationStatusSync
{
public:
BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
/// Sets the status of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_status, const String & message);
void setError(const String & current_host, const Exception & exception);
/// Sets the status of the current host and waits until all hosts come to the same status.
/// The function returns the messages all hosts set when they come to the required status.
Strings wait(const Strings & all_hosts, const String & status_to_wait);
/// Almost the same as setAndWait() but this one stops waiting and throws an exception after a specific amount of time.
Strings waitFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms);
static constexpr const char * kErrorStatus = "error";
private:
void createRootNodes();
Strings waitImpl(const Strings & all_hosts, const String & status_to_wait, std::optional<UInt64> timeout_ms);
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
Poco::Logger * log;
};
}

View File

@ -1,6 +1,7 @@
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupUtils.h>
#include <Backups/DDLAdjustingForBackupVisitor.h>
#include <Databases/IDatabase.h>
@ -31,20 +32,11 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
namespace Stage = BackupCoordinationStage;
namespace
{
/// Finding all tables and databases which we're going to put to the backup and collecting their metadata.
constexpr const char * kGatheringMetadataStatus = "gathering metadata";
/// Making temporary hard links and prepare backup entries.
constexpr const char * kExtractingDataFromTablesStatus = "extracting data from tables";
/// Running special tasks for replicated tables which can also prepare some backup entries.
constexpr const char * kRunningPostTasksStatus = "running post-tasks";
/// Writing backup entries to the backup and removing temporary hard links.
constexpr const char * kWritingBackupStatus = "writing backup";
/// Uppercases the first character of a passed string.
String toUpperFirst(const String & str)
{
@ -90,7 +82,8 @@ BackupEntriesCollector::BackupEntriesCollector(
, backup_settings(backup_settings_)
, backup_coordination(backup_coordination_)
, context(context_)
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 300000))
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))
, log(&Poco::Logger::get("BackupEntriesCollector"))
{
}
@ -100,7 +93,7 @@ BackupEntriesCollector::~BackupEntriesCollector() = default;
BackupEntries BackupEntriesCollector::run()
{
/// run() can be called onle once.
if (!current_status.empty())
if (!current_stage.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries");
/// Find other hosts working along with us to execute this ON CLUSTER query.
@ -123,36 +116,40 @@ BackupEntries BackupEntriesCollector::run()
makeBackupEntriesForTablesDefs();
/// Make backup entries for the data of the found tables.
setStatus(kExtractingDataFromTablesStatus);
setStage(Stage::EXTRACTING_DATA_FROM_TABLES);
makeBackupEntriesForTablesData();
/// Run all the tasks added with addPostCollectingTask().
setStatus(kRunningPostTasksStatus);
setStage(Stage::RUNNING_POST_TASKS);
runPostTasks();
/// No more backup entries or tasks are allowed after this point.
setStatus(kWritingBackupStatus);
setStage(Stage::WRITING_BACKUP);
return std::move(backup_entries);
}
Strings BackupEntriesCollector::setStatus(const String & new_status, const String & message)
Strings BackupEntriesCollector::setStage(const String & new_stage, const String & message)
{
LOG_TRACE(log, "{}", toUpperFirst(new_status));
current_status = new_status;
LOG_TRACE(log, "{}", toUpperFirst(new_stage));
current_stage = new_stage;
backup_coordination->setStatus(backup_settings.host_id, new_status, message);
backup_coordination->setStage(backup_settings.host_id, new_stage, message);
if (new_status.starts_with(kGatheringMetadataStatus))
if (new_stage == Stage::formatGatheringMetadata(1))
{
auto now = std::chrono::steady_clock::now();
auto end_of_timeout = std::max(now, consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout);
return backup_coordination->waitStatusFor(
all_hosts, new_status, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - now).count());
return backup_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
}
else if (new_stage.starts_with(Stage::GATHERING_METADATA))
{
auto current_time = std::chrono::steady_clock::now();
auto end_of_timeout = std::max(current_time, consistent_metadata_snapshot_end_time);
return backup_coordination->waitForStage(
all_hosts, new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
}
else
{
return backup_coordination->waitStatus(all_hosts, new_status);
return backup_coordination->waitForStage(all_hosts, new_stage);
}
}
@ -173,18 +170,18 @@ void BackupEntriesCollector::calculateRootPathInBackup()
/// Finds databases and tables which we will put to the backup.
void BackupEntriesCollector::gatherMetadataAndCheckConsistency()
{
consistent_metadata_snapshot_start_time = std::chrono::steady_clock::now();
auto end_of_timeout = consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout;
setStatus(fmt::format("{} ({})", kGatheringMetadataStatus, 1));
setStage(Stage::formatGatheringMetadata(1));
consistent_metadata_snapshot_end_time = std::chrono::steady_clock::now() + consistent_metadata_snapshot_timeout;
for (size_t pass = 1;; ++pass)
{
String new_status = fmt::format("{} ({})", kGatheringMetadataStatus, pass + 1);
String next_stage = Stage::formatGatheringMetadata(pass + 1);
std::optional<Exception> inconsistency_error;
if (tryGatherMetadataAndCompareWithPrevious(inconsistency_error))
{
/// Gathered metadata and checked consistency, cool! But we have to check that other hosts cope with that too.
auto all_hosts_results = setStatus(new_status, "consistent");
auto all_hosts_results = setStage(next_stage, "consistent");
std::optional<String> host_with_inconsistency;
std::optional<String> inconsistency_error_on_other_host;
@ -210,13 +207,13 @@ void BackupEntriesCollector::gatherMetadataAndCheckConsistency()
else
{
/// Failed to gather metadata or something wasn't consistent. We'll let other hosts know that and try again.
setStatus(new_status, inconsistency_error->displayText());
setStage(next_stage, inconsistency_error->displayText());
}
/// Two passes is minimum (we need to compare with table names with previous ones to be sure we don't miss anything).
if (pass >= 2)
{
if (std::chrono::steady_clock::now() > end_of_timeout)
if (std::chrono::steady_clock::now() > consistent_metadata_snapshot_end_time)
inconsistency_error->rethrow();
else
LOG_WARNING(log, "{}", inconsistency_error->displayText());
@ -239,6 +236,7 @@ bool BackupEntriesCollector::tryGatherMetadataAndCompareWithPrevious(std::option
table_infos.clear();
gatherDatabasesMetadata();
gatherTablesMetadata();
lockTablesForReading();
}
catch (Exception & e)
{
@ -526,12 +524,11 @@ void BackupEntriesCollector::lockTablesForReading()
for (auto & [table_name, table_info] : table_infos)
{
auto storage = table_info.storage;
TableLockHolder table_lock;
if (storage)
{
try
{
table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
}
catch (Exception & e)
{
@ -712,7 +709,7 @@ void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableN
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
backup_entries.emplace_back(file_name, backup_entry);
}
@ -724,21 +721,21 @@ void BackupEntriesCollector::addBackupEntry(const std::pair<String, BackupEntryP
void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
insertAtEnd(backup_entries, backup_entries_);
}
void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
insertAtEnd(backup_entries, std::move(backup_entries_));
}
void BackupEntriesCollector::addPostTask(std::function<void()> task)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of post tasks is not allowed");
post_tasks.push(std::move(task));
}

View File

@ -86,12 +86,13 @@ private:
void runPostTasks();
Strings setStatus(const String & new_status, const String & message = "");
Strings setStage(const String & new_stage, const String & message = "");
const ASTBackupQuery::Elements backup_query_elements;
const BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
ContextPtr context;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds consistent_metadata_snapshot_timeout;
Poco::Logger * log;
@ -129,8 +130,8 @@ private:
std::optional<ASTs> partitions;
};
String current_status;
std::chrono::steady_clock::time_point consistent_metadata_snapshot_start_time;
String current_stage;
std::chrono::steady_clock::time_point consistent_metadata_snapshot_end_time;
std::unordered_map<String, DatabaseInfo> database_infos;
std::unordered_map<QualifiedTableName, TableInfo> table_infos;
std::vector<std::pair<String, String>> previous_databases_metadata;

View File

@ -8,21 +8,22 @@ class SeekableReadBuffer;
class WriteBuffer;
/// Represents operations of loading from disk or downloading for reading a backup.
class IBackupReader /// BackupReaderFile, BackupReaderDisk, BackupReaderS3
class IBackupReader /// BackupReaderFile, BackupReaderDisk
{
public:
virtual ~IBackupReader() = default;
virtual bool fileExists(const String & file_name) = 0;
virtual size_t getFileSize(const String & file_name) = 0;
virtual UInt64 getFileSize(const String & file_name) = 0;
virtual std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) = 0;
};
/// Represents operations of storing to disk or uploading for writing a backup.
class IBackupWriter /// BackupWriterFile, BackupWriterDisk, BackupWriterS3
class IBackupWriter /// BackupWriterFile, BackupWriterDisk
{
public:
virtual ~IBackupWriter() = default;
virtual bool fileExists(const String & file_name) = 0;
virtual UInt64 getFileSize(const String & file_name) = 0;
virtual bool fileContentsEqual(const String & file_name, const String & expected_file_contents) = 0;
virtual std::unique_ptr<WriteBuffer> writeFile(const String & file_name) = 0;
virtual void removeFiles(const Strings & file_names) = 0;

View File

@ -17,7 +17,7 @@ bool BackupReaderDisk::fileExists(const String & file_name)
return disk->exists(path / file_name);
}
size_t BackupReaderDisk::getFileSize(const String & file_name)
UInt64 BackupReaderDisk::getFileSize(const String & file_name)
{
return disk->getFileSize(path / file_name);
}
@ -38,6 +38,11 @@ bool BackupWriterDisk::fileExists(const String & file_name)
return disk->exists(path / file_name);
}
UInt64 BackupWriterDisk::getFileSize(const String & file_name)
{
return disk->getFileSize(path / file_name);
}
bool BackupWriterDisk::fileContentsEqual(const String & file_name, const String & expected_file_contents)
{
if (!disk->exists(path / file_name))

View File

@ -15,7 +15,7 @@ public:
~BackupReaderDisk() override;
bool fileExists(const String & file_name) override;
size_t getFileSize(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
private:
@ -30,6 +30,7 @@ public:
~BackupWriterDisk() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;

View File

@ -18,7 +18,7 @@ bool BackupReaderFile::fileExists(const String & file_name)
return fs::exists(path / file_name);
}
size_t BackupReaderFile::getFileSize(const String & file_name)
UInt64 BackupReaderFile::getFileSize(const String & file_name)
{
return fs::file_size(path / file_name);
}
@ -39,6 +39,11 @@ bool BackupWriterFile::fileExists(const String & file_name)
return fs::exists(path / file_name);
}
UInt64 BackupWriterFile::getFileSize(const String & file_name)
{
return fs::file_size(path / file_name);
}
bool BackupWriterFile::fileContentsEqual(const String & file_name, const String & expected_file_contents)
{
if (!fs::exists(path / file_name))

View File

@ -13,7 +13,7 @@ public:
~BackupReaderFile() override;
bool fileExists(const String & file_name) override;
size_t getFileSize(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
private:
@ -27,6 +27,7 @@ public:
~BackupWriterFile() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;

View File

@ -219,10 +219,7 @@ void BackupImpl::open(const ContextPtr & context)
void BackupImpl::close()
{
std::lock_guard lock{mutex};
archive_readers.clear();
for (auto & archive_writer : archive_writers)
archive_writer = {"", nullptr};
closeArchives();
if (!is_internal_backup && writer && !writing_finalized)
removeAllFilesAfterFailure();
@ -232,10 +229,29 @@ void BackupImpl::close()
coordination.reset();
}
time_t BackupImpl::getTimestamp() const
void BackupImpl::closeArchives()
{
archive_readers.clear();
for (auto & archive_writer : archive_writers)
archive_writer = {"", nullptr};
}
size_t BackupImpl::getNumFiles() const
{
std::lock_guard lock{mutex};
return timestamp;
return num_files;
}
UInt64 BackupImpl::getUncompressedSize() const
{
std::lock_guard lock{mutex};
return uncompressed_size;
}
UInt64 BackupImpl::getCompressedSize() const
{
std::lock_guard lock{mutex};
return compressed_size;
}
void BackupImpl::writeBackupMetadata()
@ -290,6 +306,7 @@ void BackupImpl::writeBackupMetadata()
if (info.pos_in_archive != static_cast<size_t>(-1))
config->setUInt64(prefix + "pos_in_archive", info.pos_in_archive);
}
increaseUncompressedSize(info);
++index;
}
@ -306,6 +323,8 @@ void BackupImpl::writeBackupMetadata()
out = writer->writeFile(".backup");
out->write(str.data(), str.size());
out->finalize();
increaseUncompressedSize(str.size());
}
void BackupImpl::readBackupMetadata()
@ -315,6 +334,7 @@ void BackupImpl::readBackupMetadata()
{
if (!reader->fileExists(archive_params.archive_name))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name);
setCompressedSize();
in = getArchiveReader("")->readFile(".backup");
}
else
@ -326,6 +346,7 @@ void BackupImpl::readBackupMetadata()
String str;
readStringUntilEOF(str, *in);
increaseUncompressedSize(str.size());
std::istringstream stream(str); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
config->load(stream);
@ -382,8 +403,12 @@ void BackupImpl::readBackupMetadata()
}
coordination->addFileInfo(info);
increaseUncompressedSize(info);
}
}
if (!use_archives)
setCompressedSize();
}
void BackupImpl::checkBackupDoesntExist() const
@ -750,6 +775,8 @@ void BackupImpl::finalizeWriting()
{
LOG_TRACE(log, "Finalizing backup {}", backup_name);
writeBackupMetadata();
closeArchives();
setCompressedSize();
removeLockFile();
LOG_TRACE(log, "Finalized backup {}", backup_name);
}
@ -758,12 +785,32 @@ void BackupImpl::finalizeWriting()
}
void BackupImpl::increaseUncompressedSize(UInt64 file_size)
{
uncompressed_size += file_size;
++num_files;
}
void BackupImpl::increaseUncompressedSize(const FileInfo & info)
{
if ((info.size > info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name)))
increaseUncompressedSize(info.size - info.base_size);
}
void BackupImpl::setCompressedSize()
{
if (use_archives)
compressed_size = writer ? writer->getFileSize(archive_params.archive_name) : reader->getFileSize(archive_params.archive_name);
else
compressed_size = uncompressed_size;
}
String BackupImpl::getArchiveNameWithSuffix(const String & suffix) const
{
return archive_params.archive_name + (suffix.empty() ? "" : ".") + suffix;
}
std::shared_ptr<IArchiveReader> BackupImpl::getArchiveReader(const String & suffix) const
{
auto it = archive_readers.find(suffix);
@ -796,6 +843,7 @@ std::shared_ptr<IArchiveWriter> BackupImpl::getArchiveWriter(const String & suff
return new_archive_writer;
}
void BackupImpl::removeAllFilesAfterFailure()
{
if (is_internal_backup)

View File

@ -55,8 +55,11 @@ public:
const String & getName() const override { return backup_name; }
OpenMode getOpenMode() const override { return open_mode; }
time_t getTimestamp() const override;
time_t getTimestamp() const override { return timestamp; }
UUID getUUID() const override { return *uuid; }
size_t getNumFiles() const override;
UInt64 getUncompressedSize() const override;
UInt64 getCompressedSize() const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
bool fileExists(const String & file_name) const override;
@ -76,6 +79,7 @@ private:
void open(const ContextPtr & context);
void close();
void closeArchives();
/// Writes the file ".backup" containing backup's metadata.
void writeBackupMetadata();
@ -96,6 +100,13 @@ private:
std::shared_ptr<IArchiveReader> getArchiveReader(const String & suffix) const;
std::shared_ptr<IArchiveWriter> getArchiveWriter(const String & suffix);
/// Increases `uncompressed_size` by a specific value and `num_files` by 1.
void increaseUncompressedSize(UInt64 file_size);
void increaseUncompressedSize(const FileInfo & info);
/// Calculates and sets `compressed_size`.
void setCompressedSize();
const String backup_name;
const ArchiveParams archive_params;
const bool use_archives;
@ -108,6 +119,9 @@ private:
mutable std::mutex mutex;
std::optional<UUID> uuid;
time_t timestamp = 0;
size_t num_files = 0;
UInt64 uncompressed_size = 0;
UInt64 compressed_size = 0;
UInt64 version;
std::optional<BackupInfo> base_backup_info;
std::shared_ptr<const IBackup> base_backup;

View File

@ -60,6 +60,7 @@ namespace
/// List of backup settings except base_backup_name and cluster_host_ids.
#define LIST_OF_BACKUP_SETTINGS(M) \
M(String, id) \
M(String, compression_method) \
M(Int64, compression_level) \
M(String, password) \

View File

@ -11,6 +11,9 @@ class ASTBackupQuery;
/// Settings specified in the "SETTINGS" clause of a BACKUP query.
struct BackupSettings
{
/// ID of the backup operation, to identify it in the system.backups table. Auto-generated if not set.
String id;
/// Base backup, if it's set an incremental backup will be built. That means only differences made after the base backup will be put
/// into a new backup.
std::optional<BackupInfo> base_backup_info;

View File

@ -15,18 +15,18 @@ std::string_view toString(BackupStatus backup_status)
{
switch (backup_status)
{
case BackupStatus::MAKING_BACKUP:
return "MAKING_BACKUP";
case BackupStatus::BACKUP_COMPLETE:
return "BACKUP_COMPLETE";
case BackupStatus::FAILED_TO_BACKUP:
return "FAILED_TO_BACKUP";
case BackupStatus::CREATING_BACKUP:
return "CREATING_BACKUP";
case BackupStatus::BACKUP_CREATED:
return "BACKUP_CREATED";
case BackupStatus::BACKUP_FAILED:
return "BACKUP_FAILED";
case BackupStatus::RESTORING:
return "RESTORING";
case BackupStatus::RESTORED:
return "RESTORED";
case BackupStatus::FAILED_TO_RESTORE:
return "FAILED_TO_RESTORE";
case BackupStatus::RESTORE_FAILED:
return "RESTORE_FAILED";
default:
break;
}

View File

@ -9,14 +9,14 @@ namespace DB
enum class BackupStatus
{
/// Statuses of making backups
MAKING_BACKUP,
BACKUP_COMPLETE,
FAILED_TO_BACKUP,
CREATING_BACKUP,
BACKUP_CREATED,
BACKUP_FAILED,
/// Status of restoring
RESTORING,
RESTORED,
FAILED_TO_RESTORE,
RESTORE_FAILED,
MAX,
};

View File

@ -5,6 +5,7 @@
#include <Backups/BackupUtils.h>
#include <Backups/IBackupEntry.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupCoordinationRemote.h>
#include <Backups/BackupCoordinationLocal.h>
#include <Backups/RestoreCoordinationRemote.h>
@ -18,7 +19,6 @@
#include <Common/Exception.h>
#include <Common/Macros.h>
#include <Common/logger_useful.h>
#include <Common/scope_guard_safe.h>
#include <Common/setThreadName.h>
@ -27,28 +27,95 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
using OperationID = BackupsWorker::OperationID;
namespace Stage = BackupCoordinationStage;
namespace
{
/// Coordination status meaning that a host finished its work.
constexpr const char * kCompletedCoordinationStatus = "completed";
/// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendErrorToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
std::shared_ptr<IBackupCoordination> makeBackupCoordination(const String & coordination_zk_path, const ContextPtr & context, bool is_internal_backup)
{
if (!coordination_zk_path.empty())
{
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
return std::make_shared<BackupCoordinationRemote>(coordination_zk_path, get_zookeeper, !is_internal_backup);
}
else
{
return std::make_shared<BackupCoordinationLocal>();
}
}
std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(const String & coordination_zk_path, const ContextPtr & context, bool is_internal_backup)
{
if (!coordination_zk_path.empty())
{
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
return std::make_shared<RestoreCoordinationRemote>(coordination_zk_path, get_zookeeper, !is_internal_backup);
}
else
{
return std::make_shared<RestoreCoordinationLocal>();
}
}
/// Sends information about an exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host, const Exception & exception)
{
if (!coordination)
return;
try
{
coordination->setErrorStatus(current_host, Exception{getCurrentExceptionCode(), getCurrentExceptionMessage(true, true)});
if (coordination)
coordination->setError(current_host, exception);
}
catch (...)
{
}
}
/// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
{
try
{
throw;
}
catch (const Exception & e)
{
sendExceptionToCoordination(coordination, current_host, e);
}
catch (...)
{
coordination->setError(current_host, Exception{getCurrentExceptionCode(), getCurrentExceptionMessage(true, true)});
}
}
bool isFinalStatus(BackupStatus status)
{
return (status == BackupStatus::BACKUP_CREATED) || (status == BackupStatus::BACKUP_FAILED) || (status == BackupStatus::RESTORED)
|| (status == BackupStatus::RESTORE_FAILED);
}
bool isErrorStatus(BackupStatus status)
{
return (status == BackupStatus::BACKUP_FAILED) || (status == BackupStatus::RESTORE_FAILED);
}
/// Used to change num_active_backups.
size_t getNumActiveBackupsChange(BackupStatus status)
{
return status == BackupStatus::CREATING_BACKUP;
}
/// Used to change num_active_restores.
size_t getNumActiveRestoresChange(BackupStatus status)
{
return status == BackupStatus::RESTORING;
}
}
@ -60,7 +127,8 @@ BackupsWorker::BackupsWorker(size_t num_backup_threads, size_t num_restore_threa
/// We set max_free_threads = 0 because we don't want to keep any threads if there is no BACKUP or RESTORE query running right now.
}
UUID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context)
OperationID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context)
{
const ASTBackupQuery & backup_query = typeid_cast<const ASTBackupQuery &>(*backup_or_restore_query);
if (backup_query.kind == ASTBackupQuery::Kind::BACKUP)
@ -70,379 +138,525 @@ UUID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutable
}
UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & context)
OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & context)
{
auto backup_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto backup_settings = BackupSettings::fromBackupQuery(*backup_query);
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
bool on_cluster = !backup_query->cluster.empty();
if (!backup_settings.backup_uuid)
backup_settings.backup_uuid = UUIDHelpers::generateV4();
UUID backup_uuid = *backup_settings.backup_uuid;
/// Prepare context to use.
ContextPtr context_in_use = context;
ContextMutablePtr mutable_context;
if (on_cluster || backup_settings.async)
/// `backup_id` will be used as a key to the `infos` map, so it should be unique.
OperationID backup_id;
if (backup_settings.internal)
backup_id = "internal-" + toString(UUIDHelpers::generateV4()); /// Always generate `backup_id` for internal backup to avoid collision if both internal and non-internal backups are on the same host
else if (!backup_settings.id.empty())
backup_id = backup_settings.id;
else
backup_id = toString(*backup_settings.backup_uuid);
std::shared_ptr<IBackupCoordination> backup_coordination;
if (backup_settings.internal)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = mutable_context = Context::createCopy(context);
/// The following call of makeBackupCoordination() is not essential because doBackup() will later create a backup coordination
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
/// if an exception will be thrown in startMakingBackup() other hosts will know about that.
backup_coordination = makeBackupCoordination(backup_settings.coordination_zk_path, context, backup_settings.internal);
}
addInfo(backup_uuid, backup_info.toString(), BackupStatus::MAKING_BACKUP, backup_settings.internal);
try
{
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
addInfo(backup_id, backup_info.toString(), backup_settings.internal, BackupStatus::CREATING_BACKUP);
auto job = [this,
backup_uuid,
/// Prepare context to use.
ContextPtr context_in_use = context;
ContextMutablePtr mutable_context;
bool on_cluster = !backup_query->cluster.empty();
if (on_cluster || backup_settings.async)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = mutable_context = Context::createCopy(context);
}
if (backup_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, backup_query, backup_id, backup_settings, backup_info, backup_coordination, context_in_use, mutable_context]
{
doBackup(
backup_query,
backup_id,
backup_settings,
backup_info,
backup_coordination,
context_in_use,
mutable_context,
/* called_async= */ true);
});
}
else
{
doBackup(
backup_query,
backup_id,
backup_settings,
backup_info,
on_cluster,
backup_coordination,
context_in_use,
mutable_context](bool async) mutable
mutable_context,
/* called_async= */ false);
}
return backup_id;
}
catch (...)
{
std::optional<CurrentThread::QueryScope> query_scope;
std::shared_ptr<IBackupCoordination> backup_coordination;
SCOPE_EXIT_SAFE(if (backup_coordination && !backup_settings.internal) backup_coordination->drop(););
try
{
if (async)
{
query_scope.emplace(mutable_context);
setThreadName("BackupWorker");
}
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context_in_use->checkAccess(required_access);
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context_in_use->getMacros()->expand(backup_query->cluster);
cluster = context_in_use->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(backup_uuid);
}
}
/// Make a backup coordination.
if (!backup_settings.coordination_zk_path.empty())
{
backup_coordination = std::make_shared<BackupCoordinationRemote>(
backup_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
backup_coordination = std::make_shared<BackupCoordinationLocal>();
}
/// Opens a backup for writing.
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context_in_use;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup_create_params.backup_uuid = backup_uuid;
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = required_access;
backup_settings.copySettingsToQuery(*backup_query);
// executeDDLQueryOnCluster() will return without waiting for completion
mutable_context->setSetting("distributed_ddl_task_timeout", Field{0});
mutable_context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(backup_query, mutable_context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->waitStatus(all_hosts, kCompletedCoordinationStatus);
}
else
{
backup_query->setCurrentDatabase(context_in_use->getCurrentDatabase());
/// Prepare backup entries.
BackupEntries backup_entries;
{
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use};
backup_entries = backup_entries_collector.run();
}
/// Write the backup entries to the backup.
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
backup_coordination->setStatus(backup_settings.host_id, kCompletedCoordinationStatus, "");
}
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
backup->finalizeWriting();
/// Close the backup.
backup.reset();
setStatus(backup_uuid, BackupStatus::BACKUP_COMPLETE);
}
catch (...)
{
/// Something bad happened, the backup has not built.
setStatus(backup_uuid, BackupStatus::FAILED_TO_BACKUP);
sendErrorToCoordination(backup_coordination, backup_settings.host_id);
if (!async)
throw;
}
};
if (backup_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);
return backup_uuid;
/// Something bad happened, the backup has not built.
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
throw;
}
}
UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
void BackupsWorker::doBackup(
const std::shared_ptr<ASTBackupQuery> & backup_query,
const OperationID & backup_id,
BackupSettings backup_settings,
const BackupInfo & backup_info,
std::shared_ptr<IBackupCoordination> backup_coordination,
const ContextPtr & context,
ContextMutablePtr mutable_context,
bool called_async)
{
std::optional<CurrentThread::QueryScope> query_scope;
try
{
if (called_async)
{
query_scope.emplace(mutable_context);
setThreadName("BackupWorker");
}
bool on_cluster = !backup_query->cluster.empty();
assert(mutable_context || (!on_cluster && !called_async));
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context->checkAccess(required_access);
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
cluster = context->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(*backup_settings.backup_uuid);
}
}
/// Make a backup coordination.
if (!backup_coordination)
backup_coordination = makeBackupCoordination(backup_settings.coordination_zk_path, context, backup_settings.internal);
/// Opens a backup for writing.
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup_create_params.backup_uuid = backup_settings.backup_uuid;
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = required_access;
backup_settings.copySettingsToQuery(*backup_query);
// executeDDLQueryOnCluster() will return without waiting for completion
mutable_context->setSetting("distributed_ddl_task_timeout", Field{0});
mutable_context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(backup_query, mutable_context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->waitForStage(all_hosts, Stage::COMPLETED);
}
else
{
backup_query->setCurrentDatabase(context->getCurrentDatabase());
/// Prepare backup entries.
BackupEntries backup_entries;
{
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context};
backup_entries = backup_entries_collector.run();
}
/// Write the backup entries to the backup.
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
backup_coordination->setStage(backup_settings.host_id, Stage::COMPLETED, "");
}
size_t num_files = 0;
UInt64 uncompressed_size = 0;
UInt64 compressed_size = 0;
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
{
backup->finalizeWriting();
num_files = backup->getNumFiles();
uncompressed_size = backup->getUncompressedSize();
compressed_size = backup->getCompressedSize();
}
/// Close the backup.
backup.reset();
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_info.toString());
setStatus(backup_id, BackupStatus::BACKUP_CREATED);
setNumFilesAndSize(backup_id, num_files, uncompressed_size, compressed_size);
}
catch (...)
{
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
}
else
{
/// setStatus() and sendCurrentExceptionToCoordination() will be called by startMakingBackup().
throw;
}
}
}
OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
{
UUID restore_uuid = UUIDHelpers::generateV4();
auto restore_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto restore_settings = RestoreSettings::fromRestoreQuery(*restore_query);
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
bool on_cluster = !restore_query->cluster.empty();
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
if (restore_settings.async || on_cluster)
/// `restore_id` will be used as a key to the `infos` map, so it should be unique.
OperationID restore_id;
if (restore_settings.internal)
restore_id = "internal-" + toString(UUIDHelpers::generateV4()); /// Always generate `restore_id` for internal restore to avoid collision if both internal and non-internal restores are on the same host
else if (!restore_settings.id.empty())
restore_id = restore_settings.id;
else
restore_id = toString(UUIDHelpers::generateV4());
std::shared_ptr<IRestoreCoordination> restore_coordination;
if (restore_settings.internal)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = Context::createCopy(context);
/// The following call of makeRestoreCoordination() is not essential because doRestore() will later create a restore coordination
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
/// if an exception will be thrown in startRestoring() other hosts will know about that.
restore_coordination = makeRestoreCoordination(restore_settings.coordination_zk_path, context, restore_settings.internal);
}
addInfo(restore_uuid, backup_info.toString(), BackupStatus::RESTORING, restore_settings.internal);
try
{
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
addInfo(restore_id, backup_info.toString(), restore_settings.internal, BackupStatus::RESTORING);
auto job = [this,
restore_uuid,
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
bool on_cluster = !restore_query->cluster.empty();
if (restore_settings.async || on_cluster)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = Context::createCopy(context);
}
if (restore_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, restore_query, restore_id, restore_settings, backup_info, restore_coordination, context_in_use] {
doRestore(
restore_query,
restore_id,
restore_settings,
backup_info,
restore_coordination,
context_in_use,
/* called_async= */ true);
});
}
else
{
doRestore(
restore_query,
restore_id,
restore_settings,
backup_info,
on_cluster,
context_in_use](bool async) mutable
restore_coordination,
context_in_use,
/* called_async= */ false);
}
return restore_id;
}
catch (...)
{
std::optional<CurrentThread::QueryScope> query_scope;
std::shared_ptr<IRestoreCoordination> restore_coordination;
SCOPE_EXIT_SAFE(if (restore_coordination && !restore_settings.internal) restore_coordination->drop(););
try
{
if (async)
{
query_scope.emplace(context_in_use);
setThreadName("RestoreWorker");
}
/// Open the backup for reading.
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = context_in_use;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
String current_database = context_in_use->getCurrentDatabase();
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
if (on_cluster)
{
restore_query->cluster = context_in_use->getMacros()->expand(restore_query->cluster);
cluster = context_in_use->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host.
auto addresses = cluster->filterAddressesByShardOrReplica(restore_settings.shard_num, restore_settings.replica_num);
for (const auto * address : addresses)
{
restore_settings.host_id = address->toString();
auto restore_elements = restore_query->elements;
String addr_database = address->default_database.empty() ? current_database : address->default_database;
for (auto & element : restore_elements)
element.setCurrentDatabase(addr_database);
RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use};
dummy_restorer.run(RestorerFromBackup::CHECK_ACCESS_ONLY);
}
}
/// Make a restore coordination.
if (on_cluster && restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(restore_uuid);
}
if (!restore_settings.coordination_zk_path.empty())
{
restore_coordination = std::make_shared<RestoreCoordinationRemote>(
restore_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
restore_coordination = std::make_shared<RestoreCoordinationLocal>();
}
/// Do RESTORE.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
restore_settings.copySettingsToQuery(*restore_query);
// executeDDLQueryOnCluster() will return without waiting for completion
context_in_use->setSetting("distributed_ddl_task_timeout", Field{0});
context_in_use->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(restore_query, context_in_use, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
restore_coordination->waitStatus(all_hosts, kCompletedCoordinationStatus);
}
else
{
restore_query->setCurrentDatabase(current_database);
/// Restore metadata and prepare data restoring tasks.
DataRestoreTasks data_restore_tasks;
{
RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination,
backup, context_in_use};
data_restore_tasks = restorer.run(RestorerFromBackup::RESTORE);
}
/// Execute the data restoring tasks.
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
restore_coordination->setStatus(restore_settings.host_id, kCompletedCoordinationStatus, "");
}
setStatus(restore_uuid, BackupStatus::RESTORED);
}
catch (...)
{
/// Something bad happened, the backup has not built.
setStatus(restore_uuid, BackupStatus::FAILED_TO_RESTORE);
sendErrorToCoordination(restore_coordination, restore_settings.host_id);
if (!async)
throw;
}
};
if (restore_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);
return restore_uuid;
/// Something bad happened, the backup has not built.
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
throw;
}
}
void BackupsWorker::addInfo(const UUID & uuid, const String & backup_name, BackupStatus status, bool internal)
void BackupsWorker::doRestore(
const std::shared_ptr<ASTBackupQuery> & restore_query,
const OperationID & restore_id,
RestoreSettings restore_settings,
const BackupInfo & backup_info,
std::shared_ptr<IRestoreCoordination> restore_coordination,
ContextMutablePtr context,
bool called_async)
{
std::optional<CurrentThread::QueryScope> query_scope;
try
{
if (called_async)
{
query_scope.emplace(context);
setThreadName("RestoreWorker");
}
/// Open the backup for reading.
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = context;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
setNumFilesAndSize(restore_id, backup->getNumFiles(), backup->getUncompressedSize(), backup->getCompressedSize());
String current_database = context->getCurrentDatabase();
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
bool on_cluster = !restore_query->cluster.empty();
if (on_cluster)
{
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
cluster = context->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host.
auto addresses = cluster->filterAddressesByShardOrReplica(restore_settings.shard_num, restore_settings.replica_num);
for (const auto * address : addresses)
{
restore_settings.host_id = address->toString();
auto restore_elements = restore_query->elements;
String addr_database = address->default_database.empty() ? current_database : address->default_database;
for (auto & element : restore_elements)
element.setCurrentDatabase(addr_database);
RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context};
dummy_restorer.run(RestorerFromBackup::CHECK_ACCESS_ONLY);
}
}
/// Make a restore coordination.
if (on_cluster && restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(UUIDHelpers::generateV4());
}
if (!restore_coordination)
restore_coordination = makeRestoreCoordination(restore_settings.coordination_zk_path, context, restore_settings.internal);
/// Do RESTORE.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
restore_settings.copySettingsToQuery(*restore_query);
// executeDDLQueryOnCluster() will return without waiting for completion
context->setSetting("distributed_ddl_task_timeout", Field{0});
context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(restore_query, context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
restore_coordination->waitForStage(all_hosts, Stage::COMPLETED);
}
else
{
restore_query->setCurrentDatabase(current_database);
/// Restore metadata and prepare data restoring tasks.
DataRestoreTasks data_restore_tasks;
{
RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination,
backup, context};
data_restore_tasks = restorer.run(RestorerFromBackup::RESTORE);
}
/// Execute the data restoring tasks.
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
}
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString());
setStatus(restore_id, BackupStatus::RESTORED);
}
catch (...)
{
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
}
else
{
/// setStatus() and sendCurrentExceptionToCoordination() will be called by startRestoring().
throw;
}
}
}
void BackupsWorker::addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status)
{
Info info;
info.uuid = uuid;
info.backup_name = backup_name;
info.status = status;
info.status_changed_time = time(nullptr);
info.id = id;
info.name = name;
info.internal = internal;
info.status = status;
info.start_time = std::chrono::system_clock::now();
if (isFinalStatus(status))
info.end_time = info.start_time;
std::lock_guard lock{infos_mutex};
infos[uuid] = std::move(info);
auto it = infos.find(id);
if (it != infos.end())
{
/// It's better not allow to overwrite the current status if it's in progress.
auto current_status = it->second.status;
if (!isFinalStatus(current_status))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot start a backup or restore: ID {} is already in use", id);
}
infos[id] = std::move(info);
num_active_backups += getNumActiveBackupsChange(status);
num_active_restores += getNumActiveRestoresChange(status);
}
void BackupsWorker::setStatus(const UUID & uuid, BackupStatus status)
void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw_if_error)
{
std::lock_guard lock{infos_mutex};
auto & info = infos.at(uuid);
info.status = status;
info.status_changed_time = time(nullptr);
if (status == BackupStatus::BACKUP_COMPLETE)
auto it = infos.find(id);
if (it == infos.end())
{
LOG_INFO(log, "{} {} was created successfully", (info.internal ? "Internal backup" : "Backup"), info.backup_name);
}
else if (status == BackupStatus::RESTORED)
{
LOG_INFO(log, "Restored from {} {} successfully", (info.internal ? "internal backup" : "backup"), info.backup_name);
}
else if ((status == BackupStatus::FAILED_TO_BACKUP) || (status == BackupStatus::FAILED_TO_RESTORE))
{
String start_of_message;
if (status == BackupStatus::FAILED_TO_BACKUP)
start_of_message = fmt::format("Failed to create {} {}", (info.internal ? "internal backup" : "backup"), info.backup_name);
if (throw_if_error)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id);
else
start_of_message = fmt::format("Failed to restore from {} {}", (info.internal ? "internal backup" : "backup"), info.backup_name);
tryLogCurrentException(log, start_of_message);
return;
}
auto & info = it->second;
auto old_status = info.status;
info.status = status;
if (isFinalStatus(status))
info.end_time = std::chrono::system_clock::now();
if (isErrorStatus(status))
{
info.error_message = getCurrentExceptionMessage(false);
info.exception = std::current_exception();
}
num_active_backups += getNumActiveBackupsChange(status) - getNumActiveBackupsChange(old_status);
num_active_restores += getNumActiveRestoresChange(status) - getNumActiveRestoresChange(old_status);
}
void BackupsWorker::wait(const UUID & backup_or_restore_uuid, bool rethrow_exception)
void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, UInt64 uncompressed_size, UInt64 compressed_size)
{
std::lock_guard lock{infos_mutex};
auto it = infos.find(id);
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id);
auto & info = it->second;
info.num_files = num_files;
info.uncompressed_size = uncompressed_size;
info.compressed_size = compressed_size;
}
void BackupsWorker::wait(const OperationID & id, bool rethrow_exception)
{
std::unique_lock lock{infos_mutex};
status_changed.wait(lock, [&]
{
auto it = infos.find(backup_or_restore_uuid);
auto it = infos.find(id);
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: Unknown UUID {}", toString(backup_or_restore_uuid));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id);
const auto & info = it->second;
auto current_status = info.status;
if (rethrow_exception && ((current_status == BackupStatus::FAILED_TO_BACKUP) || (current_status == BackupStatus::FAILED_TO_RESTORE)))
if (rethrow_exception && isErrorStatus(current_status))
std::rethrow_exception(info.exception);
return (current_status == BackupStatus::BACKUP_COMPLETE) || (current_status == BackupStatus::RESTORED);
return isFinalStatus(current_status);
});
}
BackupsWorker::Info BackupsWorker::getInfo(const UUID & backup_or_restore_uuid) const
BackupsWorker::Info BackupsWorker::getInfo(const OperationID & id) const
{
std::lock_guard lock{infos_mutex};
auto it = infos.find(backup_or_restore_uuid);
auto it = infos.find(id);
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: Unknown UUID {}", toString(backup_or_restore_uuid));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id);
return it->second;
}
@ -451,20 +665,24 @@ std::vector<BackupsWorker::Info> BackupsWorker::getAllInfos() const
std::vector<Info> res_infos;
std::lock_guard lock{infos_mutex};
for (const auto & info : infos | boost::adaptors::map_values)
res_infos.push_back(info);
{
if (!info.internal)
res_infos.push_back(info);
}
return res_infos;
}
void BackupsWorker::shutdown()
{
size_t num_active_backups = backups_thread_pool.active();
size_t num_active_restores = restores_thread_pool.active();
if (!num_active_backups && !num_active_restores)
return;
LOG_INFO(log, "Waiting for {} backup and {} restore tasks to be finished", num_active_backups, num_active_restores);
bool has_active_backups_and_restores = (num_active_backups || num_active_restores);
if (has_active_backups_and_restores)
LOG_INFO(log, "Waiting for {} backups and {} restores to be finished", num_active_backups, num_active_restores);
backups_thread_pool.wait();
restores_thread_pool.wait();
LOG_INFO(log, "All backup and restore tasks have finished");
if (has_active_backups_and_restores)
LOG_INFO(log, "All backup and restore tasks have finished");
}
}

View File

@ -11,6 +11,13 @@ namespace Poco::Util { class AbstractConfiguration; }
namespace DB
{
class ASTBackupQuery;
struct BackupSettings;
struct RestoreSettings;
struct BackupInfo;
class IBackupCoordination;
class IRestoreCoordination;
/// Manager of backups and restores: executes backups and restores' threads in the background.
/// Keeps information about backups and restores started in this session.
class BackupsWorker
@ -21,47 +28,75 @@ public:
/// Waits until all tasks have been completed.
void shutdown();
/// Starts executing a BACKUP or RESTORE query. Returns UUID of the operation.
UUID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
/// Backup's or restore's operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID.
using OperationID = String;
/// Starts executing a BACKUP or RESTORE query. Returns ID of the operation.
OperationID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
/// Waits until a BACKUP or RESTORE query started by start() is finished.
/// The function returns immediately if the operation is already finished.
void wait(const UUID & backup_or_restore_uuid, bool rethrow_exception = true);
void wait(const OperationID & backup_or_restore_id, bool rethrow_exception = true);
/// Information about executing a BACKUP or RESTORE query started by calling start().
struct Info
{
UUID uuid;
/// Backup's or restore's operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID.
OperationID id;
/// Backup's name, a string like "Disk('backups', 'my_backup')"
String backup_name;
String name;
BackupStatus status;
time_t status_changed_time;
String error_message;
std::exception_ptr exception;
/// Whether this operation is internal, i.e. caused by another BACKUP or RESTORE operation.
/// For example BACKUP ON CLUSTER executes an internal BACKUP commands per each node.
/// This operation is internal and should not be shown in system.backups
bool internal = false;
/// Status of backup or restore operation.
BackupStatus status;
/// Number of files in the backup (including backup's metadata; only unique files are counted).
size_t num_files = 0;
/// Size of all files in the backup (including backup's metadata; only unique files are counted).
UInt64 uncompressed_size = 0;
/// Size of the backup if it's stored as an archive; or the same as `uncompressed_size` if the backup is stored as a folder.
UInt64 compressed_size = 0;
/// Set only if there was an error.
std::exception_ptr exception;
String error_message;
std::chrono::system_clock::time_point start_time;
std::chrono::system_clock::time_point end_time;
};
Info getInfo(const UUID & backup_or_restore_uuid) const;
Info getInfo(const OperationID & id) const;
std::vector<Info> getAllInfos() const;
private:
UUID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
UUID startRestoring(const ASTPtr & query, ContextMutablePtr context);
OperationID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
void addInfo(const UUID & uuid, const String & backup_name, BackupStatus status, bool internal);
void setStatus(const UUID & uuid, BackupStatus status);
void doBackup(const std::shared_ptr<ASTBackupQuery> & backup_query, const OperationID & backup_id, BackupSettings backup_settings,
const BackupInfo & backup_info, std::shared_ptr<IBackupCoordination> backup_coordination, const ContextPtr & context,
ContextMutablePtr mutable_context, bool called_async);
OperationID startRestoring(const ASTPtr & query, ContextMutablePtr context);
void doRestore(const std::shared_ptr<ASTBackupQuery> & restore_query, const OperationID & restore_id, RestoreSettings restore_settings, const BackupInfo & backup_info,
std::shared_ptr<IRestoreCoordination> restore_coordination, ContextMutablePtr context, bool called_async);
void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);
void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); }
void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 uncompressed_size, UInt64 compressed_size);
ThreadPool backups_thread_pool;
ThreadPool restores_thread_pool;
std::unordered_map<UUID, Info> infos;
std::unordered_map<OperationID, Info> infos;
std::condition_variable status_changed;
std::atomic<size_t> num_active_backups = 0;
std::atomic<size_t> num_active_restores = 0;
mutable std::mutex infos_mutex;
Poco::Logger * log;
};

View File

@ -36,6 +36,15 @@ public:
/// Returns UUID of the backup.
virtual UUID getUUID() const = 0;
/// Returns the number of unique files in the backup.
virtual size_t getNumFiles() const = 0;
/// Returns the total size of unique files in the backup.
virtual UInt64 getUncompressedSize() const = 0;
/// Returns the compressed size of the backup. If the backup is not stored as an archive it returns the same as getUncompressedSize().
virtual UInt64 getCompressedSize() const = 0;
/// Returns names of entries stored in a specified directory in the backup.
/// If `directory` is empty or '/' the functions returns entries in the backup's root.
virtual Strings listFiles(const String & directory, bool recursive = false) const = 0;

View File

@ -18,11 +18,11 @@ class IBackupCoordination
public:
virtual ~IBackupCoordination() = default;
/// Sets the current status and waits for other hosts to come to this status too.
virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0;
virtual void setErrorStatus(const String & current_host, const Exception & exception) = 0;
virtual Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) = 0;
virtual Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) = 0;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
virtual void setError(const String & current_host, const Exception & exception) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
struct PartNameAndChecksum
{
@ -115,9 +115,6 @@ public:
/// Returns the list of all the archive suffixes which were generated.
virtual Strings getAllArchiveSuffixes() const = 0;
/// Removes remotely stored information.
virtual void drop() {}
};
}

View File

@ -16,11 +16,11 @@ class IRestoreCoordination
public:
virtual ~IRestoreCoordination() = default;
/// Sets the current status and waits for other hosts to come to this status too.
virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0;
virtual void setErrorStatus(const String & current_host, const Exception & exception) = 0;
virtual Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) = 0;
virtual Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) = 0;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
virtual void setError(const String & current_host, const Exception & exception) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
static constexpr const char * kErrorStatus = "error";
@ -34,9 +34,6 @@ public:
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
virtual bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) = 0;
/// Removes remotely stored information.
virtual void drop() {}
};
}

View File

@ -7,20 +7,20 @@ namespace DB
RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
void RestoreCoordinationLocal::setStatus(const String &, const String &, const String &)
void RestoreCoordinationLocal::setStage(const String &, const String &, const String &)
{
}
void RestoreCoordinationLocal::setErrorStatus(const String &, const Exception &)
void RestoreCoordinationLocal::setError(const String &, const Exception &)
{
}
Strings RestoreCoordinationLocal::waitStatus(const Strings &, const String &)
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &)
{
return {};
}
Strings RestoreCoordinationLocal::waitStatusFor(const Strings &, const String &, UInt64)
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
{
return {};
}

View File

@ -18,11 +18,11 @@ public:
RestoreCoordinationLocal();
~RestoreCoordinationLocal() override;
/// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts.
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;

View File

@ -6,57 +6,86 @@
namespace DB
{
RestoreCoordinationRemote::RestoreCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
RestoreCoordinationRemote::RestoreCoordinationRemote(
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("RestoreCoordination"))
, remove_zk_nodes_in_destructor(remove_zk_nodes_in_destructor_)
{
createRootNodes();
stage_sync.emplace(
zookeeper_path_ + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("RestoreCoordination"));
}
RestoreCoordinationRemote::~RestoreCoordinationRemote() = default;
RestoreCoordinationRemote::~RestoreCoordinationRemote()
{
try
{
if (remove_zk_nodes_in_destructor)
removeAllNodes();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
zkutil::ZooKeeperPtr RestoreCoordinationRemote::getZooKeeper() const
{
std::lock_guard lock{mutex};
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
}
return zookeeper;
}
void RestoreCoordinationRemote::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
auto zk = getZooKeeper();
zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path, "");
zk->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
}
void RestoreCoordinationRemote::setStatus(const String & current_host, const String & new_status, const String & message)
void RestoreCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
{
status_sync.set(current_host, new_status, message);
stage_sync->set(current_host, new_stage, message);
}
void RestoreCoordinationRemote::setErrorStatus(const String & current_host, const Exception & exception)
void RestoreCoordinationRemote::setError(const String & current_host, const Exception & exception)
{
status_sync.setError(current_host, exception);
stage_sync->setError(current_host, exception);
}
Strings RestoreCoordinationRemote::waitStatus(const Strings & all_hosts, const String & status_to_wait)
Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
{
return status_sync.wait(all_hosts, status_to_wait);
return stage_sync->wait(all_hosts, stage_to_wait);
}
Strings RestoreCoordinationRemote::waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return status_sync.waitFor(all_hosts, status_to_wait, timeout_ms);
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -65,10 +94,10 @@ bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const S
bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -77,10 +106,10 @@ bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const St
bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -89,13 +118,15 @@ bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & ac
void RestoreCoordinationRemote::removeAllNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->removeRecursive(zookeeper_path);
}
/// Usually this function is called by the initiator when a restore operation is complete so we don't need the coordination anymore.
///
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
/// while some hosts are still restoring something. Removing all the nodes will remove the parent node of the restore coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some part
/// of their restore work before that.
void RestoreCoordinationRemote::drop()
{
removeAllNodes();
auto zk = getZooKeeper();
zk->removeRecursive(zookeeper_path);
}
}

View File

@ -1,7 +1,7 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationStatusSync.h>
#include <Backups/BackupCoordinationStageSync.h>
namespace DB
@ -11,14 +11,14 @@ namespace DB
class RestoreCoordinationRemote : public IRestoreCoordination
{
public:
RestoreCoordinationRemote(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper);
RestoreCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_);
~RestoreCoordinationRemote() override;
/// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts.
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
@ -31,10 +31,8 @@ public:
/// The function returns false if this access storage is being already restored by another replica.
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
/// Removes remotely stored information.
void drop() override;
private:
zkutil::ZooKeeperPtr getZooKeeper() const;
void createRootNodes();
void removeAllNodes();
@ -42,7 +40,12 @@ private:
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
BackupCoordinationStatusSync status_sync;
const bool remove_zk_nodes_in_destructor;
std::optional<BackupCoordinationStageSync> stage_sync;
mutable std::mutex mutex;
mutable zkutil::ZooKeeperPtr zookeeper;
};
}

View File

@ -143,6 +143,7 @@ namespace
/// List of restore settings except base_backup_name and cluster_host_ids.
#define LIST_OF_RESTORE_SETTINGS(M) \
M(String, id) \
M(String, password) \
M(Bool, structure_only) \
M(RestoreTableCreationMode, create_table) \

View File

@ -41,6 +41,9 @@ using RestoreUDFCreationMode = RestoreAccessCreationMode;
/// Settings specified in the "SETTINGS" clause of a RESTORE query.
struct RestoreSettings
{
/// ID of the restore operation, to identify it in the system.backups table. Auto-generated if not set.
String id;
/// Base backup, with this setting we can override the location of the base backup while restoring.
/// Any incremental backup keeps inside the information about its base backup, so using this setting is optional.
std::optional<BackupInfo> base_backup_info;

View File

@ -1,5 +1,6 @@
#include <Backups/RestorerFromBackup.h>
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupSettings.h>
#include <Backups/IBackup.h>
#include <Backups/IBackupEntry.h>
@ -38,20 +39,10 @@ namespace ErrorCodes
}
namespace Stage = BackupCoordinationStage;
namespace
{
/// Finding databases and tables in the backup which we're going to restore.
constexpr const char * kFindingTablesInBackupStatus = "finding tables in backup";
/// Creating databases or finding them and checking their definitions.
constexpr const char * kCreatingDatabasesStatus = "creating databases";
/// Creating tables or finding them and checking their definition.
constexpr const char * kCreatingTablesStatus = "creating tables";
/// Inserting restored data to tables.
constexpr const char * kInsertingDataToTablesStatus = "inserting data to tables";
/// Uppercases the first character of a passed string.
String toUpperFirst(const String & str)
{
@ -102,6 +93,7 @@ RestorerFromBackup::RestorerFromBackup(
, restore_coordination(restore_coordination_)
, backup(backup_)
, context(context_)
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000))
, log(&Poco::Logger::get("RestorerFromBackup"))
{
@ -112,7 +104,7 @@ RestorerFromBackup::~RestorerFromBackup() = default;
RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
{
/// run() can be called onle once.
if (!current_status.empty())
if (!current_stage.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring");
/// Find other hosts working along with us to execute this ON CLUSTER query.
@ -126,7 +118,7 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
findRootPathsInBackup();
/// Find all the databases and tables which we will read from the backup.
setStatus(kFindingTablesInBackupStatus);
setStage(Stage::FINDING_TABLES_IN_BACKUP);
findDatabasesAndTablesInBackup();
/// Check access rights.
@ -136,27 +128,31 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
return {};
/// Create databases using the create queries read from the backup.
setStatus(kCreatingDatabasesStatus);
setStage(Stage::CREATING_DATABASES);
createDatabases();
/// Create tables using the create queries read from the backup.
setStatus(kCreatingTablesStatus);
setStage(Stage::CREATING_TABLES);
createTables();
/// All what's left is to insert data to tables.
/// No more data restoring tasks are allowed after this point.
setStatus(kInsertingDataToTablesStatus);
setStage(Stage::INSERTING_DATA_TO_TABLES);
return getDataRestoreTasks();
}
void RestorerFromBackup::setStatus(const String & new_status, const String & message)
void RestorerFromBackup::setStage(const String & new_stage, const String & message)
{
LOG_TRACE(log, "{}", toUpperFirst(new_status));
current_status = new_status;
LOG_TRACE(log, "{}", toUpperFirst(new_stage));
current_stage = new_stage;
if (restore_coordination)
{
restore_coordination->setStatus(restore_settings.host_id, new_status, message);
restore_coordination->waitStatus(all_hosts, new_status);
restore_coordination->setStage(restore_settings.host_id, new_stage, message);
if (new_stage == Stage::FINDING_TABLES_IN_BACKUP)
restore_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
else
restore_coordination->waitForStage(all_hosts, new_stage);
}
}
@ -814,14 +810,14 @@ std::vector<QualifiedTableName> RestorerFromBackup::findTablesWithoutDependencie
void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task)
{
if (current_status == kInsertingDataToTablesStatus)
if (current_stage == Stage::INSERTING_DATA_TO_TABLES)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of data-restoring tasks is not allowed");
data_restore_tasks.push_back(std::move(new_task));
}
void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks)
{
if (current_status == kInsertingDataToTablesStatus)
if (current_stage == Stage::INSERTING_DATA_TO_TABLES)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of data-restoring tasks is not allowed");
insertAtEnd(data_restore_tasks, std::move(new_tasks));
}

View File

@ -73,6 +73,7 @@ private:
std::shared_ptr<IRestoreCoordination> restore_coordination;
BackupPtr backup;
ContextMutablePtr context;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds create_table_timeout;
Poco::Logger * log;
@ -100,7 +101,7 @@ private:
DataRestoreTasks getDataRestoreTasks();
void setStatus(const String & new_status, const String & message = "");
void setStage(const String & new_stage, const String & message = "");
struct DatabaseInfo
{
@ -124,7 +125,7 @@ private:
std::vector<QualifiedTableName> findTablesWithoutDependencies() const;
String current_status;
String current_stage;
std::unordered_map<String, DatabaseInfo> database_infos;
std::map<QualifiedTableName, TableInfo> table_infos;
std::vector<DataRestoreTask> data_restore_tasks;

View File

@ -69,6 +69,7 @@
#include <IO/CompressionMethod.h>
#include <Client/InternalTextLogs.h>
#include <boost/algorithm/string/replace.hpp>
#include <IO/ForkWriteBuffer.h>
namespace fs = std::filesystem;
@ -403,7 +404,6 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
return;
processed_rows += block.rows();
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
initOutputFormat(block, parsed_query);
@ -414,7 +414,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
return;
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
if (need_render_progress && (stdout_is_a_tty || is_interactive) && !select_into_file)
if (need_render_progress && (stdout_is_a_tty || is_interactive) && (!select_into_file || select_into_file_and_stdout))
progress_indication.clearProgressOutput();
try
@ -434,7 +434,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
/// Restore progress bar after data block.
if (need_render_progress && (stdout_is_a_tty || is_interactive))
{
if (select_into_file)
if (select_into_file && !select_into_file_and_stdout)
std::cerr << "\r";
progress_indication.writeProgress();
}
@ -511,7 +511,7 @@ try
String current_format = format;
select_into_file = false;
select_into_file_and_stdout = false;
/// The query can specify output format or output file.
if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(parsed_query.get()))
{
@ -554,6 +554,13 @@ try
compression_level
);
if (query_with_output->is_into_outfile_with_stdout)
{
select_into_file_and_stdout = true;
out_file_buf = std::make_unique<ForkWriteBuffer>(std::vector<WriteBufferPtr>{std::move(out_file_buf),
std::make_shared<WriteBufferFromFileDescriptor>(STDOUT_FILENO)});
}
// We are writing to file, so default format is the same as in non-interactive mode.
if (is_interactive && is_default_format)
current_format = "TabSeparated";
@ -578,7 +585,7 @@ try
/// It is not clear how to write progress intermixed with data with parallel formatting.
/// It may increase code complexity significantly.
if (!need_render_progress || select_into_file)
if (!need_render_progress || (select_into_file && !select_into_file_and_stdout))
output_format = global_context->getOutputFormatParallelIfPossible(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else

View File

@ -181,6 +181,7 @@ protected:
String format; /// Query results output format.
bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
bool select_into_file_and_stdout = false; /// If writing result INTO OUTFILE AND STDOUT. It affects progress rendering.
bool is_default_format = true; /// false, if format is set in the config or command line.
size_t format_max_block_size = 0; /// Max block size for console output.
String insert_format; /// Format of INSERT data that is read from stdin in batch mode.

View File

@ -329,9 +329,9 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
case 0:
{
const auto r = fuzz_rand() % 3;
def.frame_type = r == 0 ? WindowFrame::FrameType::Rows
: r == 1 ? WindowFrame::FrameType::Range
: WindowFrame::FrameType::Groups;
def.frame_type = r == 0 ? WindowFrame::FrameType::ROWS
: r == 1 ? WindowFrame::FrameType::RANGE
: WindowFrame::FrameType::GROUPS;
break;
}
case 1:
@ -385,7 +385,7 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
break;
}
if (def.frame_type == WindowFrame::FrameType::Range
if (def.frame_type == WindowFrame::FrameType::RANGE
&& def.frame_begin_type == WindowFrame::BoundaryType::Unbounded
&& def.frame_begin_preceding
&& def.frame_end_type == WindowFrame::BoundaryType::Current)

View File

@ -0,0 +1,266 @@
#pragma once
#include <base/types.h>
#include <boost/core/noncopyable.hpp>
#include <mutex>
#include <memory>
#include <list>
#include <condition_variable>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
}
/*
* Controls how many threads can be allocated for a query (or another activity).
* There is a limited amount of slots for threads. It can be set with `setMaxConcurrency(limit)`.
*
* Lifecycle of a slot: free -> granted -> acquired -> free.
* free: slot is available to be allocated by any query.
* granted: slot is allocated by specific query, but not yet acquired by any thread.
* acquired: slot is allocated by specific query and acquired by a thread.
*
* USAGE:
* 1. Create an allocation for a query:
* `auto slots = ConcurrencyControl::instance().allocate(min, max);`
* It will allocate at least `min` and at most `max` slots.
* Note that `min` slots are granted immediately, but other `max - min` may be granted later.
* 2. For every thread a slot has to be acquired from that allocation:
* `while (auto slot = slots->tryAcquire()) createYourThread([slot = std::move(slot)] { ... });`
* This snippet can be used at query startup and for upscaling later.
* (both functions are non-blocking)
*
* Released slots are distributed between waiting allocations in a round-robin manner to provide fairness.
* Oversubscription is possible: total amount of allocated slots can exceed `setMaxConcurrency(limit)`
* because `min` amount of slots is allocated for each query unconditionally.
*/
class ConcurrencyControl : boost::noncopyable
{
public:
struct Allocation;
using AllocationPtr = std::shared_ptr<Allocation>;
using SlotCount = UInt64;
using Waiters = std::list<Allocation *>;
static constexpr SlotCount Unlimited = std::numeric_limits<SlotCount>::max();
// Scoped guard for acquired slot, see Allocation::tryAcquire()
struct Slot : boost::noncopyable
{
~Slot()
{
allocation->release();
}
private:
friend struct Allocation; // for ctor
explicit Slot(AllocationPtr && allocation_)
: allocation(std::move(allocation_))
{}
AllocationPtr allocation;
};
// FIXME: have to be unique_ptr, but ThreadFromGlobalPool does not support move semantics yet
using SlotPtr = std::shared_ptr<Slot>;
// Manages group of slots for a single query, see ConcurrencyControl::allocate(min, max)
struct Allocation : std::enable_shared_from_this<Allocation>, boost::noncopyable
{
~Allocation()
{
// We have to lock parent's mutex to avoid race with grant()
// NOTE: shortcut can be added, but it requires Allocation::mutex lock even to check if shortcut is possible
parent.free(this);
}
// Take one already granted slot if available. Lock-free iff there is no granted slot.
[[nodiscard]] SlotPtr tryAcquire()
{
SlotCount value = granted.load();
while (value)
{
if (granted.compare_exchange_strong(value, value - 1))
{
std::unique_lock lock{mutex};
return SlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
}
}
return {}; // avoid unnecessary locking
}
SlotCount grantedCount() const
{
return granted;
}
private:
friend struct Slot; // for release()
friend class ConcurrencyControl; // for grant(), free() and ctor
Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_ = {})
: parent(parent_)
, limit(limit_)
, allocated(granted_)
, granted(granted_)
, waiter(waiter_)
{
if (allocated < limit)
*waiter = this;
}
auto cancel()
{
std::unique_lock lock{mutex};
return std::pair{allocated - released,
allocated < limit ?
std::optional<Waiters::iterator>(waiter) :
std::optional<Waiters::iterator>()};
}
// Grant single slot to allocation, returns true iff more slot(s) are required
bool grant()
{
std::unique_lock lock{mutex};
granted++;
allocated++;
return allocated < limit;
}
// Release one slot and grant it to other allocation if required
void release()
{
parent.release(1);
std::unique_lock lock{mutex};
released++;
if (released > allocated)
abort();
}
ConcurrencyControl & parent;
const SlotCount limit;
std::mutex mutex; // the following values must be accessed under this mutex
SlotCount allocated; // allocated total (including already `released`)
SlotCount released = 0;
std::atomic<SlotCount> granted; // allocated, but not yet acquired
const Waiters::iterator waiter; // iterator to itself in Waiters list; valid iff allocated < limit
};
public:
ConcurrencyControl()
: cur_waiter(waiters.end())
{}
// WARNING: all Allocation objects MUST be destructed before ConcurrencyControl
// NOTE: Recommended way to achieve this is to use `instance()` and do graceful shutdown of queries
~ConcurrencyControl()
{
if (!waiters.empty())
abort();
}
// Allocate at least `min` and at most `max` slots.
// If not all `max` slots were successfully allocated, a subscription for later allocation is created
// Use `Allocation::tryAcquire()` to acquire allocated slot, before running a thread.
[[nodiscard]] AllocationPtr allocate(SlotCount min, SlotCount max)
{
if (min > max)
throw DB::Exception("ConcurrencyControl: invalid allocation requirements", DB::ErrorCodes::LOGICAL_ERROR);
std::unique_lock lock{mutex};
// Acquire as much slots as we can, but not lower than `min`
SlotCount granted = std::max(min, std::min(max, available(lock)));
cur_concurrency += granted;
// Create allocation and start waiting if more slots are required
if (granted < max)
return AllocationPtr(new Allocation(*this, max, granted,
waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */)));
else
return AllocationPtr(new Allocation(*this, max, granted));
}
void setMaxConcurrency(SlotCount value)
{
std::unique_lock lock{mutex};
max_concurrency = std::max<SlotCount>(1, value); // never allow max_concurrency to be zero
schedule(lock);
}
static ConcurrencyControl & instance()
{
static ConcurrencyControl result;
return result;
}
private:
friend struct Allocation; // for free() and release()
void free(Allocation * allocation)
{
// Allocation is allowed to be canceled even if there are:
// - `amount`: granted slots (acquired slots are not possible, because Slot holds AllocationPtr)
// - `waiter`: active waiting for more slots to be allocated
// Thus Allocation destruction may require the following lock, to avoid race conditions
std::unique_lock lock{mutex};
auto [amount, waiter] = allocation->cancel();
cur_concurrency -= amount;
if (waiter)
{
if (cur_waiter == *waiter)
cur_waiter = waiters.erase(*waiter);
else
waiters.erase(*waiter);
}
schedule(lock);
}
void release(SlotCount amount)
{
std::unique_lock lock{mutex};
cur_concurrency -= amount;
schedule(lock);
}
// Round-robin scheduling of available slots among waiting allocations
void schedule(std::unique_lock<std::mutex> &)
{
while (cur_concurrency < max_concurrency && !waiters.empty())
{
cur_concurrency++;
if (cur_waiter == waiters.end())
cur_waiter = waiters.begin();
Allocation * allocation = *cur_waiter;
if (allocation->grant())
++cur_waiter;
else
cur_waiter = waiters.erase(cur_waiter); // last required slot has just been granted -- stop waiting
}
}
SlotCount available(std::unique_lock<std::mutex> &)
{
if (cur_concurrency < max_concurrency)
return max_concurrency - cur_concurrency;
else
return 0;
}
std::mutex mutex;
Waiters waiters;
Waiters::iterator cur_waiter; // round-robin pointer
SlotCount max_concurrency = Unlimited;
SlotCount cur_concurrency = 0;
};

View File

@ -73,7 +73,7 @@ bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const S
/// Check that the right configuration has the same set of subkeys as the left configuration.
Poco::Util::AbstractConfiguration::Keys right_subkeys;
right.keys(right_key, right_subkeys);
std::unordered_set<StringRef> left_subkeys{subkeys.begin(), subkeys.end()};
std::unordered_set<std::string_view> left_subkeys{subkeys.begin(), subkeys.end()};
if ((left_subkeys.size() != right_subkeys.size()) || (left_subkeys.size() != subkeys.size()))
return false;
for (const auto & right_subkey : right_subkeys)

View File

@ -3,12 +3,18 @@
// MemoryTrackerBlockerInThread
thread_local uint64_t MemoryTrackerBlockerInThread::counter = 0;
thread_local VariableContext MemoryTrackerBlockerInThread::level = VariableContext::Global;
MemoryTrackerBlockerInThread::MemoryTrackerBlockerInThread(VariableContext level_)
: previous_level(level)
{
++counter;
level = level_;
}
MemoryTrackerBlockerInThread::MemoryTrackerBlockerInThread() : MemoryTrackerBlockerInThread(VariableContext::User)
{
}
MemoryTrackerBlockerInThread::~MemoryTrackerBlockerInThread()
{
--counter;

View File

@ -11,9 +11,12 @@ private:
static thread_local VariableContext level;
VariableContext previous_level;
public:
/// level_ - block in level and above
explicit MemoryTrackerBlockerInThread(VariableContext level_ = VariableContext::User);
explicit MemoryTrackerBlockerInThread(VariableContext level_);
public:
explicit MemoryTrackerBlockerInThread();
~MemoryTrackerBlockerInThread();
MemoryTrackerBlockerInThread(const MemoryTrackerBlockerInThread &) = delete;
@ -23,4 +26,6 @@ public:
{
return counter > 0 && current_level >= level;
}
friend class MemoryTracker;
};

View File

@ -79,7 +79,7 @@ void SystemLogBase<LogElement>::add(const LogElement & element)
/// The size of allocation can be in order of a few megabytes.
/// But this should not be accounted for query memory usage.
/// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky.
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker(VariableContext::Global);
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
/// Should not log messages under mutex.
bool queue_is_half_full = false;

View File

@ -15,20 +15,31 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
constexpr size_t StringHashTablePadRequirement = 8;
/// TLDList
TLDList::TLDList(size_t size)
: tld_container(size)
, pool(std::make_unique<Arena>(10 << 20))
{}
bool TLDList::insert(StringRef host)
, memory_pool(std::make_unique<Arena>())
{
bool inserted;
tld_container.emplace(DB::ArenaKeyHolder{host, *pool}, inserted);
return inserted;
/// StringHashTable requires padded to 8 bytes key,
/// and Arena (memory_pool here) does satisfies this,
/// since it has padding with 15 bytes at the right.
///
/// However, StringHashTable may reference -1 byte of the key,
/// so left padding is also required:
memory_pool->alignedAlloc(StringHashTablePadRequirement, StringHashTablePadRequirement);
}
bool TLDList::has(StringRef host) const
void TLDList::insert(const String & host, TLDType type)
{
return tld_container.has(host);
StringRef owned_host{memory_pool->insert(host.data(), host.size()), host.size()};
tld_container[owned_host] = type;
}
TLDType TLDList::lookup(StringRef host) const
{
if (auto it = tld_container.find(host); it != nullptr)
return it->getMapped();
return TLDType::TLD_NONE;
}
/// TLDListsHolder
@ -57,32 +68,44 @@ void TLDListsHolder::parseConfig(const std::string & top_level_domains_path, con
size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::string & path)
{
std::unordered_set<std::string> tld_list_tmp;
std::unordered_map<std::string, TLDType> tld_list_tmp;
ReadBufferFromFile in(path);
String line;
String buffer;
while (!in.eof())
{
readEscapedStringUntilEOL(line, in);
readEscapedStringUntilEOL(buffer, in);
if (!in.eof())
++in.position();
std::string_view line(buffer);
/// Skip comments
if (line.size() > 2 && line[0] == '/' && line[1] == '/')
if (line.starts_with("//"))
continue;
line = trim(line, [](char c) { return std::isspace(c); });
line = line.substr(0, line.rend() - std::find_if_not(line.rbegin(), line.rend(), ::isspace));
/// Skip empty line
if (line.empty())
continue;
tld_list_tmp.emplace(line);
/// Validate special symbols.
if (line.starts_with("*."))
{
line = line.substr(2);
tld_list_tmp.emplace(line, TLDType::TLD_ANY);
}
else if (line[0] == '!')
{
line = line.substr(1);
tld_list_tmp.emplace(line, TLDType::TLD_EXCLUDE);
}
else
tld_list_tmp.emplace(line, TLDType::TLD_REGULAR);
}
if (!in.eof())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not all list had been read", name);
TLDList tld_list(tld_list_tmp.size());
for (const auto & host : tld_list_tmp)
for (const auto & [host, type] : tld_list_tmp)
{
StringRef host_ref{host.data(), host.size()};
tld_list.insert(host_ref);
tld_list.insert(host, type);
}
size_t tld_list_size = tld_list.size();

View File

@ -2,7 +2,7 @@
#include <base/defines.h>
#include <base/StringRef.h>
#include <Common/HashTable/StringHashSet.h>
#include <Common/HashTable/StringHashMap.h>
#include <Common/Arena.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <mutex>
@ -12,25 +12,35 @@
namespace DB
{
enum TLDType
{
/// Does not exist marker
TLD_NONE,
/// For regular lines
TLD_REGULAR,
/// For asterisk (*)
TLD_ANY,
/// For exclamation mark (!)
TLD_EXCLUDE,
};
/// Custom TLD List
///
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashSet.
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashMap.
class TLDList
{
public:
using Container = StringHashSet<>;
using Container = StringHashMap<TLDType>;
explicit TLDList(size_t size);
/// Return true if the tld_container does not contains such element.
bool insert(StringRef host);
/// Check is there such TLD
bool has(StringRef host) const;
void insert(const String & host, TLDType type);
TLDType lookup(StringRef host) const;
size_t size() const { return tld_container.size(); }
private:
Container tld_container;
std::unique_ptr<Arena> pool;
std::unique_ptr<Arena> memory_pool;
};
class TLDListsHolder
@ -48,6 +58,11 @@ public:
/// - "//" -- comment,
/// - empty lines will be ignored.
///
/// Treats the following special symbols:
/// - "*"
/// - "!"
///
/// Format : https://github.com/publicsuffix/list/wiki/Format
/// Example: https://publicsuffix.org/list/public_suffix_list.dat
///
/// Return size of the list.

View File

@ -9,7 +9,6 @@
#include <Common/StringSearcher.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/UTF8Helpers.h>
#include <base/StringRef.h>
#include <base/unaligned.h>
/** Search for a substring in a string by Volnitsky's algorithm

View File

@ -2,6 +2,7 @@
#include <base/types.h>
#include <Common/Exception.h>
#include <Coordination/KeeperConstants.h>
#include <vector>
#include <memory>
@ -57,6 +58,8 @@ struct Stat
int32_t dataLength{0}; /// NOLINT
int32_t numChildren{0}; /// NOLINT
int64_t pzxid{0};
bool operator==(const Stat &) const = default;
};
enum class Error : int32_t
@ -109,7 +112,6 @@ bool isUserError(Error code);
const char * errorMessage(Error code);
struct Request;
using RequestPtr = std::shared_ptr<Request>;
using Requests = std::vector<RequestPtr>;
@ -516,6 +518,8 @@ public:
const Requests & requests,
MultiCallback callback) = 0;
virtual DB::KeeperApiVersion getApiVersion() = 0;
/// Expire session and finish all pending requests
virtual void finalize(const String & reason) = 0;
};

View File

@ -90,6 +90,11 @@ public:
void finalize(const String & reason) override;
DB::KeeperApiVersion getApiVersion() override
{
return KeeperApiVersion::ZOOKEEPER_COMPATIBLE;
}
struct Node
{
String data;

View File

@ -337,17 +337,17 @@ Coordination::Error ZooKeeper::getChildrenImpl(const std::string & path, Strings
}
}
Strings ZooKeeper::getChildren(const std::string & path, Coordination::Stat * stat, const EventPtr & watch)
Strings ZooKeeper::getChildren(const std::string & path, Coordination::Stat * stat, const EventPtr & watch, Coordination::ListRequestType list_request_type)
{
Strings res;
check(tryGetChildren(path, res, stat, watch), path);
check(tryGetChildren(path, res, stat, watch, list_request_type), path);
return res;
}
Strings ZooKeeper::getChildrenWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback)
Strings ZooKeeper::getChildrenWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback, Coordination::ListRequestType list_request_type)
{
Strings res;
check(tryGetChildrenWatch(path, res, stat, watch_callback), path);
check(tryGetChildrenWatch(path, res, stat, watch_callback, list_request_type), path);
return res;
}
@ -540,7 +540,6 @@ Coordination::Error ZooKeeper::getImpl(const std::string & path, std::string & r
}
}
std::string ZooKeeper::get(const std::string & path, Coordination::Stat * stat, const EventPtr & watch)
{
Coordination::Error code = Coordination::Error::ZOK;
@ -904,6 +903,11 @@ bool ZooKeeper::expired()
return impl->isExpired();
}
DB::KeeperApiVersion ZooKeeper::getApiVersion()
{
return impl->getApiVersion();
}
Int64 ZooKeeper::getClientID()
{
return impl->getSessionID();

View File

@ -127,6 +127,8 @@ public:
/// Returns true, if the session has expired.
bool expired();
DB::KeeperApiVersion getApiVersion();
/// Create a znode.
/// Throw an exception if something went wrong.
std::string create(const std::string & path, const std::string & data, int32_t mode);
@ -184,11 +186,13 @@ public:
Strings getChildren(const std::string & path,
Coordination::Stat * stat = nullptr,
const EventPtr & watch = nullptr);
const EventPtr & watch = nullptr,
Coordination::ListRequestType list_request_type = Coordination::ListRequestType::ALL);
Strings getChildrenWatch(const std::string & path,
Coordination::Stat * stat,
Coordination::WatchCallback watch_callback);
Coordination::WatchCallback watch_callback,
Coordination::ListRequestType list_request_type = Coordination::ListRequestType::ALL);
/// Doesn't not throw in the following cases:
/// * The node doesn't exist.

View File

@ -724,7 +724,10 @@ void ZooKeeperResponse::fillLogElements(LogElements & elems, size_t idx) const
assert(!elem.xid || elem.xid == xid);
elem.xid = xid;
int32_t response_op = tryGetOpNum();
assert(!elem.op_num || elem.op_num == response_op || response_op < 0);
[[maybe_unused]] const bool is_filtered_list = elem.op_num == static_cast<int32_t>(Coordination::OpNum::FilteredList)
&& response_op == static_cast<int32_t>(Coordination::OpNum::List);
assert(!elem.op_num || elem.op_num == response_op || is_filtered_list || response_op < 0);
elem.op_num = response_op;
elem.zxid = zxid;
@ -892,6 +895,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory()
registerZooKeeperRequest<OpNum::SessionID, ZooKeeperSessionIDRequest>(*this);
registerZooKeeperRequest<OpNum::GetACL, ZooKeeperGetACLRequest>(*this);
registerZooKeeperRequest<OpNum::SetACL, ZooKeeperSetACLRequest>(*this);
registerZooKeeperRequest<OpNum::FilteredList, ZooKeeperFilteredListRequest>(*this);
}
}

Some files were not shown because too many files have changed in this diff Show More