Merge branch 'master' into schema-inference-cache

This commit is contained in:
Kruglov Pavel 2022-07-27 11:35:36 +02:00 committed by GitHub
commit 381ea139c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
353 changed files with 7012 additions and 2442 deletions

View File

@ -394,7 +394,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
@ -437,7 +437,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -477,7 +477,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -521,7 +521,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -560,7 +560,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
EOF
- name: Download json reports

View File

@ -102,6 +102,9 @@ jobs:
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{ runner.temp }}/style_check
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job

View File

@ -108,7 +108,7 @@ jobs:
- name: Style Check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 style_check.py
python3 style_check.py --no-push
- name: Cleanup
if: always()
run: |
@ -971,7 +971,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
@ -1020,7 +1020,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check (actions)
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
@ -1061,7 +1061,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1098,7 +1098,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release_database_ordinary
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseOrdinary, actions)
CHECK_NAME=Stateless tests (release, DatabaseOrdinary)
REPO_COPY=${{runner.temp}}/stateless_release_database_ordinary/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1135,7 +1135,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, s3 storage, actions)
CHECK_NAME=Stateless tests (release, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1172,7 +1172,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1209,7 +1209,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1248,7 +1248,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1287,7 +1287,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1326,7 +1326,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1365,7 +1365,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1404,7 +1404,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1441,7 +1441,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1480,7 +1480,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1519,7 +1519,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1558,7 +1558,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1597,7 +1597,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1636,7 +1636,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1678,7 +1678,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1715,7 +1715,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release_database_ordinary
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, DatabaseOrdinary, actions)
CHECK_NAME=Stateful tests (release, DatabaseOrdinary)
REPO_COPY=${{runner.temp}}/stateful_release_database_ordinary/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1752,7 +1752,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1789,7 +1789,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1826,7 +1826,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1863,7 +1863,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1900,7 +1900,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1937,7 +1937,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1977,7 +1977,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2017,7 +2017,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2053,7 +2053,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -2089,7 +2089,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -2125,7 +2125,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -2164,7 +2164,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -2202,7 +2202,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -2240,7 +2240,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -2278,7 +2278,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -2316,7 +2316,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -2354,7 +2354,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -2392,7 +2392,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -2430,7 +2430,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -2468,7 +2468,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
@ -2509,7 +2509,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (ASan, actions)
CHECK_NAME=AST fuzzer (ASan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
EOF
- name: Download json reports
@ -2545,7 +2545,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (TSan, actions)
CHECK_NAME=AST fuzzer (TSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
EOF
- name: Download json reports
@ -2581,7 +2581,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (UBSan, actions)
CHECK_NAME=AST fuzzer (UBSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
EOF
- name: Download json reports
@ -2617,7 +2617,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (MSan, actions)
CHECK_NAME=AST fuzzer (MSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
EOF
- name: Download json reports
@ -2653,7 +2653,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (debug, actions)
CHECK_NAME=AST fuzzer (debug)
REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse
EOF
- name: Download json reports
@ -2692,7 +2692,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (asan, actions)
CHECK_NAME=Unit tests (asan)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2728,7 +2728,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang, actions)
CHECK_NAME=Unit tests (release-clang)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2764,7 +2764,7 @@ jobs:
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc, actions)
# CHECK_NAME=Unit tests (release-gcc)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
@ -2800,7 +2800,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (tsan, actions)
CHECK_NAME=Unit tests (tsan)
REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse
EOF
- name: Download json reports
@ -2836,7 +2836,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (msan, actions)
CHECK_NAME=Unit tests (msan)
REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse
EOF
- name: Download json reports
@ -2872,7 +2872,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (ubsan, actions)
CHECK_NAME=Unit tests (ubsan)
REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse
EOF
- name: Download json reports

View File

@ -118,6 +118,9 @@ jobs:
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{ runner.temp }}/style_check
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job
@ -1026,7 +1029,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
@ -1075,7 +1078,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check (actions)
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
@ -1116,7 +1119,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1153,7 +1156,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_database_replicated
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseReplicated, actions)
CHECK_NAME=Stateless tests (release, DatabaseReplicated)
REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1192,7 +1195,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_database_replicated
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseReplicated, actions)
CHECK_NAME=Stateless tests (release, DatabaseReplicated)
REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1231,7 +1234,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_wide_parts
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, wide parts enabled, actions)
CHECK_NAME=Stateless tests (release, wide parts enabled)
REPO_COPY=${{runner.temp}}/stateless_wide_parts/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1268,7 +1271,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, s3 storage, actions)
CHECK_NAME=Stateless tests (release, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1305,7 +1308,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1342,7 +1345,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1381,7 +1384,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1420,7 +1423,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1459,7 +1462,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1498,7 +1501,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1537,7 +1540,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1574,7 +1577,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1613,7 +1616,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1652,7 +1655,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1691,7 +1694,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1730,7 +1733,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1769,7 +1772,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1808,7 +1811,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_flaky_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests flaky check (address, actions)
CHECK_NAME=Stateless tests flaky check (address)
REPO_COPY=${{runner.temp}}/stateless_flaky_asan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1844,7 +1847,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/tests_bugfix_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Tests bugfix validate check (actions)
CHECK_NAME=tests bugfix validate check
KILL_TIMEOUT=3600
REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
EOF
@ -1866,12 +1869,12 @@ jobs:
TEMP_PATH="${TEMP_PATH}/integration" \
REPORTS_PATH="${REPORTS_PATH}/integration" \
python3 integration_test_check.py "Integration tests bugfix validate check" \
python3 integration_test_check.py "Integration $CHECK_NAME" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
TEMP_PATH="${TEMP_PATH}/stateless" \
REPORTS_PATH="${REPORTS_PATH}/stateless" \
python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \
python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
@ -1895,7 +1898,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1932,7 +1935,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1969,7 +1972,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2006,7 +2009,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2043,7 +2046,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2080,7 +2083,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2117,7 +2120,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2157,7 +2160,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2197,7 +2200,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2233,7 +2236,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -2269,7 +2272,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -2305,7 +2308,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -2344,7 +2347,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (ASan, actions)
CHECK_NAME=AST fuzzer (ASan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
EOF
- name: Download json reports
@ -2380,7 +2383,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (TSan, actions)
CHECK_NAME=AST fuzzer (TSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
EOF
- name: Download json reports
@ -2416,7 +2419,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (UBSan, actions)
CHECK_NAME=AST fuzzer (UBSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
EOF
- name: Download json reports
@ -2452,7 +2455,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (MSan, actions)
CHECK_NAME=AST fuzzer (MSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
EOF
- name: Download json reports
@ -2488,7 +2491,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (debug, actions)
CHECK_NAME=AST fuzzer (debug)
REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse
EOF
- name: Download json reports
@ -2527,7 +2530,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -2565,7 +2568,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -2603,7 +2606,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -2641,7 +2644,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -2679,7 +2682,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -2717,7 +2720,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -2755,7 +2758,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -2793,7 +2796,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -2831,7 +2834,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
@ -2869,7 +2872,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan_flaky_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests flaky check (asan, actions)
CHECK_NAME=Integration tests flaky check (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse
EOF
- name: Download json reports
@ -2908,7 +2911,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (asan, actions)
CHECK_NAME=Unit tests (asan)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2944,7 +2947,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang, actions)
CHECK_NAME=Unit tests (release-clang)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2980,7 +2983,7 @@ jobs:
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc, actions)
# CHECK_NAME=Unit tests (release-gcc)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
@ -3016,7 +3019,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (tsan, actions)
CHECK_NAME=Unit tests (tsan)
REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse
EOF
- name: Download json reports
@ -3052,7 +3055,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (msan, actions)
CHECK_NAME=Unit tests (msan)
REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse
EOF
- name: Download json reports
@ -3088,7 +3091,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (ubsan, actions)
CHECK_NAME=Unit tests (ubsan)
REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse
EOF
- name: Download json reports

View File

@ -473,7 +473,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
@ -517,7 +517,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -554,7 +554,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -591,7 +591,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -630,7 +630,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -669,7 +669,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -708,7 +708,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -747,7 +747,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -786,7 +786,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -823,7 +823,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -862,7 +862,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -901,7 +901,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -940,7 +940,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -979,7 +979,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1018,7 +1018,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1060,7 +1060,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1097,7 +1097,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1134,7 +1134,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1171,7 +1171,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1208,7 +1208,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1245,7 +1245,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1282,7 +1282,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1322,7 +1322,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -1362,7 +1362,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -1398,7 +1398,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -1434,7 +1434,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -1470,7 +1470,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -1509,7 +1509,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -1547,7 +1547,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -1585,7 +1585,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -1623,7 +1623,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -1661,7 +1661,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -1699,7 +1699,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -1737,7 +1737,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -1775,7 +1775,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -1813,7 +1813,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2

6
.gitmodules vendored
View File

@ -259,6 +259,9 @@
[submodule "contrib/minizip-ng"]
path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl.git
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash.git
@ -274,9 +277,6 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing.git
[submodule "contrib/base-x"]
path = contrib/base-x
url = https://github.com/ClickHouse/base-x.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares

View File

@ -1,17 +1,18 @@
### Table of Contents
**[ClickHouse release v22.7, 2022-07-21](#226)**<br>
**[ClickHouse release v22.6, 2022-06-16](#226)**<br>
**[ClickHouse release v22.5, 2022-05-19](#225)**<br>
**[ClickHouse release v22.4, 2022-04-20](#224)**<br>
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br>
**[ClickHouse release v22.2, 2022-02-17](#222)**<br>
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br>
**[ClickHouse release v22.7, 2022-07-21](#227)**<br/>
**[ClickHouse release v22.6, 2022-06-16](#226)**<br/>
**[ClickHouse release v22.5, 2022-05-19](#225)**<br/>
**[ClickHouse release v22.4, 2022-04-20](#224)**<br/>
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br/>
**[ClickHouse release v22.2, 2022-02-17](#222)**<br/>
**[ClickHouse release v22.1, 2022-01-18](#221)**<br/>
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br/>
### <a id="227"></a> ClickHouse release 22.7, 2022-07-21
#### Upgrade Notes
* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable `format_csv_allow_single_quotes` by default. See [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). ([Kruglov Pavel](https://github.com/Avogar)).
* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back.
@ -21,7 +22,8 @@
* Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)).
* Implement NATS table engine, which allows to pub/sub to NATS. Closes [#32388](https://github.com/ClickHouse/ClickHouse/issues/32388). [#37171](https://github.com/ClickHouse/ClickHouse/pull/37171) ([tchepavel](https://github.com/tchepavel)). ([Kseniia Sumarokova](https://github.com/kssenii))
* Implement table function `mongodb`. Allow writes into `MongoDB` storage / table function. [#37213](https://github.com/ClickHouse/ClickHouse/pull/37213) ([aaapetrenko](https://github.com/aaapetrenko)). ([Kseniia Sumarokova](https://github.com/kssenii))
* Add SQLInsert output format. Closes [#38441](https://github.com/ClickHouse/ClickHouse/issues/38441). [#38477](https://github.com/ClickHouse/ClickHouse/pull/38477) ([Kruglov Pavel](https://github.com/Avogar)).
* Add `SQLInsert` output format. Closes [#38441](https://github.com/ClickHouse/ClickHouse/issues/38441). [#38477](https://github.com/ClickHouse/ClickHouse/pull/38477) ([Kruglov Pavel](https://github.com/Avogar)).
* Introduced settings `additional_table_filters`. Using this setting, you can specify additional filtering condition for a table which will be applied directly after reading. Example: `select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers : 'number != 3', 'table_1' : 'x != 2'}`. Introduced setting `additional_result_filter` which specifies additional filtering condition for query result. Closes [#37918](https://github.com/ClickHouse/ClickHouse/issues/37918). [#38475](https://github.com/ClickHouse/ClickHouse/pull/38475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add `compatibility` setting and `system.settings_changes` system table that contains information about changes in settings through ClickHouse versions. Closes [#35972](https://github.com/ClickHouse/ClickHouse/issues/35972). [#38957](https://github.com/ClickHouse/ClickHouse/pull/38957) ([Kruglov Pavel](https://github.com/Avogar)).
* Add functions `translate(string, from_string, to_string)` and `translateUTF8(string, from_string, to_string)`. It translates some characters to another. [#38935](https://github.com/ClickHouse/ClickHouse/pull/38935) ([Nikolay Degterinsky](https://github.com/evillique)).
* Support `parseTimeDelta` function. It can be used like ` ;-+,:` can be used as separators, eg. `1yr-2mo`, `2m:6s`: `SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds')`. [#39071](https://github.com/ClickHouse/ClickHouse/pull/39071) ([jiahui-97](https://github.com/jiahui-97)).
@ -35,7 +37,7 @@
* Add `send_logs_source_regexp` setting. Send server text logs with specified regexp to match log source name. Empty means all sources. [#39161](https://github.com/ClickHouse/ClickHouse/pull/39161) ([Amos Bird](https://github.com/amosbird)).
* Support `ALTER` for `Hive` tables. [#38214](https://github.com/ClickHouse/ClickHouse/pull/38214) ([lgbo](https://github.com/lgbo-ustc)).
* Support `isNullable` function. This function checks whether it's argument is nullable and return 1 or 0. Closes [#38611](https://github.com/ClickHouse/ClickHouse/issues/38611). [#38841](https://github.com/ClickHouse/ClickHouse/pull/38841) ([lokax](https://github.com/lokax)).
* Added Base58 encoding/decoding. [#38159](https://github.com/ClickHouse/ClickHouse/pull/38159) ([Andrey Zvonov](https://github.com/zvonand)).
* Added functions for base58 encoding/decoding. [#38159](https://github.com/ClickHouse/ClickHouse/pull/38159) ([Andrey Zvonov](https://github.com/zvonand)).
* Add chart visualization to Play UI. [#38197](https://github.com/ClickHouse/ClickHouse/pull/38197) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added L2 Squared distance and norm functions for both arrays and tuples. [#38545](https://github.com/ClickHouse/ClickHouse/pull/38545) ([Julian Gilyadov](https://github.com/israelg99)).
* Add ability to pass HTTP headers to the `url` table function / storage via SQL. Closes [#37897](https://github.com/ClickHouse/ClickHouse/issues/37897). [#38176](https://github.com/ClickHouse/ClickHouse/pull/38176) ([Kseniia Sumarokova](https://github.com/kssenii)).
@ -47,7 +49,8 @@
#### Performance Improvement
* Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)).
* Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix significant join performance regression which was introduced in https://github.com/ClickHouse/ClickHouse/pull/35616 . It's interesting that common join queries such as ssb queries have been 10 times slower for almost 3 months while no one complains. [#38052](https://github.com/ClickHouse/ClickHouse/pull/38052) ([Amos Bird](https://github.com/amosbird)).
* More parallel execution for queries with `FINAL` [#36396](https://github.com/ClickHouse/ClickHouse/pull/36396) ([Nikita Taranov](https://github.com/nickitat)).
* Fix significant join performance regression which was introduced in [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616). It's interesting that common join queries such as ssb queries have been 10 times slower for almost 3 months while no one complains. [#38052](https://github.com/ClickHouse/ClickHouse/pull/38052) ([Amos Bird](https://github.com/amosbird)).
* Migrate from the Intel hyperscan library to vectorscan, this speeds up many string matching on non-x86 platforms. [#38171](https://github.com/ClickHouse/ClickHouse/pull/38171) ([Robert Schulze](https://github.com/rschu1ze)).
* Increased parallelism of query plan steps executed after aggregation. [#38295](https://github.com/ClickHouse/ClickHouse/pull/38295) ([Nikita Taranov](https://github.com/nickitat)).
* Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)).
@ -59,7 +62,6 @@
* `ORDER BY (a, b)` will use all the same benefits as `ORDER BY a, b`. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)).
* Align branches within a 32B boundary to make benchmark more stable. [#38988](https://github.com/ClickHouse/ClickHouse/pull/38988) ([Guo Wangyang](https://github.com/guowangy)). It improves performance 1..2% on average for Intel.
* Executable UDF, executable dictionaries, and Executable tables will avoid wasting one second during wait for subprocess termination. [#38929](https://github.com/ClickHouse/ClickHouse/pull/38929) ([Constantine Peresypkin](https://github.com/pkit)).
* TODO remove? Pushdown filter to the right side of sorting join. [#39123](https://github.com/ClickHouse/ClickHouse/pull/39123) ([Vladimir C](https://github.com/vdimir)).
* Optimize accesses to `system.stack_trace` table if not all columns are selected. [#39177](https://github.com/ClickHouse/ClickHouse/pull/39177) ([Azat Khuzhin](https://github.com/azat)).
* Improve isNullable/isConstant/isNull/isNotNull performance for LowCardinality argument. [#39192](https://github.com/ClickHouse/ClickHouse/pull/39192) ([Kruglov Pavel](https://github.com/Avogar)).
* Optimized processing of ORDER BY in window functions. [#34632](https://github.com/ClickHouse/ClickHouse/pull/34632) ([Vladimir Chebotarev](https://github.com/excitoon)).

View File

@ -554,6 +554,16 @@ macro (clickhouse_add_executable target)
endif()
endmacro()
# With cross-compiling, all targets are built for the target platform which usually different from the host
# platform. This is problematic if a build artifact X (e.g. a file or an executable) is generated by running
# another executable Y previously produced in the build. This is solved by compiling and running Y for/on
# the host platform. Add target to the list:
# add_native_target(<target> ...)
set_property (GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
function (add_native_target)
set_property (GLOBAL APPEND PROPERTY NATIVE_BUILD_TARGETS ${ARGV})
endfunction (add_native_target)
set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.")
include_directories(${ConfigIncludePath})
@ -568,3 +578,33 @@ add_subdirectory (tests)
add_subdirectory (utils)
include (cmake/sanitize_target_link_libraries.cmake)
# Build native targets if necessary
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
if (NATIVE_BUILD_TARGETS
AND NOT(
CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR
)
)
message (STATUS "Building native targets...")
set (NATIVE_BUILD_DIR "${CMAKE_BINARY_DIR}/native")
execute_process(
COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}"
COMMAND_ECHO STDOUT)
execute_process(
COMMAND ${CMAKE_COMMAND}
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
${CMAKE_SOURCE_DIR}
WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
COMMAND_ECHO STDOUT)
execute_process(
COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS}
COMMAND_ECHO STDOUT)
endif ()

View File

@ -669,18 +669,18 @@ std::string JSON::getName() const
return getString();
}
StringRef JSON::getRawString() const
std::string_view JSON::getRawString() const
{
Pos s = ptr_begin;
if (*s != '"')
throw JSONException(std::string("JSON: expected \", got ") + *s);
while (++s != ptr_end && *s != '"');
if (s != ptr_end)
return StringRef(ptr_begin + 1, s - ptr_begin - 1);
return std::string_view(ptr_begin + 1, s - ptr_begin - 1);
throw JSONException("JSON: incorrect syntax (expected end of string, found end of JSON).");
}
StringRef JSON::getRawName() const
std::string_view JSON::getRawName() const
{
return getRawString();
}

View File

@ -136,8 +136,8 @@ public:
std::string getName() const; /// Получить имя name-value пары.
JSON getValue() const; /// Получить значение name-value пары.
StringRef getRawString() const;
StringRef getRawName() const;
std::string_view getRawString() const;
std::string_view getRawName() const;
/// Получить значение элемента; если элемент - строка, то распарсить значение из строки; если не строка или число - то исключение.
double toDouble() const;

View File

@ -1,68 +1,192 @@
#include <sys/auxv.h>
#include "atomic.h"
#include <unistd.h> // __environ
#include <sys/auxv.h>
#include <fcntl.h> // open
#include <sys/stat.h> // O_RDONLY
#include <unistd.h> // read, close
#include <stdlib.h> // ssize_t
#include <stdio.h> // perror, fprintf
#include <link.h> // ElfW
#include <errno.h>
// We don't have libc struct available here. Compute aux vector manually.
static unsigned long * __auxv = NULL;
static unsigned long __auxv_secure = 0;
#define ARRAY_SIZE(a) sizeof((a))/sizeof((a[0]))
static size_t __find_auxv(unsigned long type)
/// Suppress TSan since it is possible for this code to be called from multiple threads,
/// and initialization is safe to be done multiple times from multiple threads.
#if defined(__clang__)
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
#else
# define NO_SANITIZE_THREAD
#endif
// We don't have libc struct available here.
// Compute aux vector manually (from /proc/self/auxv).
//
// Right now there is only 51 AT_* constants,
// so 64 should be enough until this implementation will be replaced with musl.
static unsigned long __auxv_procfs[64];
static unsigned long __auxv_secure = 0;
// Common
static unsigned long * __auxv_environ = NULL;
static void * volatile getauxval_func;
static unsigned long __auxv_init_environ(unsigned long type);
//
// auxv from procfs interface
//
ssize_t __retry_read(int fd, void * buf, size_t count)
{
for (;;)
{
ssize_t ret = read(fd, buf, count);
if (ret == -1)
{
if (errno == EINTR)
{
continue;
}
perror("Cannot read /proc/self/auxv");
abort();
}
return ret;
}
}
unsigned long NO_SANITIZE_THREAD __getauxval_procfs(unsigned long type)
{
if (type == AT_SECURE)
{
return __auxv_secure;
}
if (type >= ARRAY_SIZE(__auxv_procfs))
{
errno = ENOENT;
return 0;
}
return __auxv_procfs[type];
}
static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type)
{
// For debugging:
// - od -t dL /proc/self/auxv
// - LD_SHOW_AUX= ls
int fd = open("/proc/self/auxv", O_RDONLY);
// It is possible in case of:
// - no procfs mounted
// - on android you are not able to read it unless running from shell or debugging
// - some other issues
if (fd == -1)
{
// Fallback to environ.
a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__auxv_init_environ);
return __auxv_init_environ(type);
}
ElfW(auxv_t) aux;
/// NOTE: sizeof(aux) is very small (less then PAGE_SIZE), so partial read should not be possible.
_Static_assert(sizeof(aux) < 4096, "Unexpected sizeof(aux)");
while (__retry_read(fd, &aux, sizeof(aux)) == sizeof(aux))
{
if (aux.a_type == AT_NULL)
{
break;
}
if (aux.a_type == AT_IGNORE || aux.a_type == AT_IGNOREPPC)
{
continue;
}
if (aux.a_type >= ARRAY_SIZE(__auxv_procfs))
{
fprintf(stderr, "AT_* is out of range: %li (maximum allowed is %zu)\n", aux.a_type, ARRAY_SIZE(__auxv_procfs));
abort();
}
if (__auxv_procfs[aux.a_type])
{
/// It is possible due to race on initialization.
}
__auxv_procfs[aux.a_type] = aux.a_un.a_val;
}
close(fd);
__auxv_secure = __getauxval_procfs(AT_SECURE);
// Now we've initialized __auxv_procfs, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__getauxval_procfs);
return __getauxval_procfs(type);
}
//
// auxv from environ interface
//
// NOTE: environ available only after static initializers,
// so you cannot rely on this if you need getauxval() before.
//
// Good example of such user is sanitizers, for example
// LSan will not work with __auxv_init_environ(),
// since it needs getauxval() before.
//
static size_t NO_SANITIZE_THREAD __find_auxv(unsigned long type)
{
size_t i;
for (i = 0; __auxv[i]; i += 2)
for (i = 0; __auxv_environ[i]; i += 2)
{
if (__auxv[i] == type)
if (__auxv_environ[i] == type)
{
return i + 1;
}
}
return (size_t) -1;
}
unsigned long __getauxval(unsigned long type)
unsigned long NO_SANITIZE_THREAD __getauxval_environ(unsigned long type)
{
if (type == AT_SECURE)
return __auxv_secure;
if (__auxv)
if (__auxv_environ)
{
size_t index = __find_auxv(type);
if (index != ((size_t) -1))
return __auxv[index];
return __auxv_environ[index];
}
errno = ENOENT;
return 0;
}
static void * volatile getauxval_func;
static unsigned long __auxv_init(unsigned long type)
static unsigned long NO_SANITIZE_THREAD __auxv_init_environ(unsigned long type)
{
if (!__environ)
{
// __environ is not initialized yet so we can't initialize __auxv right now.
// __environ is not initialized yet so we can't initialize __auxv_environ right now.
// That's normally occurred only when getauxval() is called from some sanitizer's internal code.
errno = ENOENT;
return 0;
}
// Initialize __auxv and __auxv_secure.
// Initialize __auxv_environ and __auxv_secure.
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
__auxv_environ = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
__auxv_secure = __auxv_environ[secure_idx];
// Now we've initialized __auxv, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval);
// Now we need to switch to __getauxval_environ for all later calls, since
// everything is initialized.
a_cas_p(&getauxval_func, (void *)__auxv_init_environ, (void *)__getauxval_environ);
return __getauxval(type);
return __getauxval_environ(type);
}
// First time getauxval() will call __auxv_init().
static void * volatile getauxval_func = (void *)__auxv_init;
// Callchain:
// - __auxv_init_procfs -> __getauxval_environ
// - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ
static void * volatile getauxval_func = (void *)__auxv_init_procfs;
unsigned long getauxval(unsigned long type)
{

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54464)
SET(VERSION_REVISION 54465)
SET(VERSION_MAJOR 22)
SET(VERSION_MINOR 7)
SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 7000c4e0033bb9e69050ab8ef73e8e7465f78059)
SET(VERSION_DESCRIBE v22.7.1.1-testing)
SET(VERSION_STRING 22.7.1.1)
SET(VERSION_GITHASH f4f05ec786a8b8966dd0ea2a2d7e39a8c7db24f4)
SET(VERSION_DESCRIBE v22.8.1.1-testing)
SET(VERSION_STRING 22.8.1.1)
# end of autochange

View File

@ -156,8 +156,8 @@ endif()
add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry)
add_contrib (base-x-cmake base-x)
add_contrib(c-ares-cmake c-ares)
add_contrib (c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear

2
contrib/avro vendored

@ -1 +1 @@
Subproject commit e43c46e87fd32eafdc09471e95344555454c5ef8
Subproject commit 7832659ec986075d560f930c288e973c64679552

1
contrib/base-x vendored

@ -1 +0,0 @@
Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5

View File

@ -1,28 +0,0 @@
option (ENABLE_BASEX "Enable base-x" ${ENABLE_LIBRARIES})
if (NOT ENABLE_BASEX)
message(STATUS "Not using base-x")
return()
endif()
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x")
set (SRCS
${LIBRARY_DIR}/base_x.hh
${LIBRARY_DIR}/uinteger_t.hh
)
add_library(_base-x INTERFACE)
target_include_directories(_base-x SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/base-x")
if (XCODE OR XCODE_VERSION)
# https://gitlab.kitware.com/cmake/cmake/issues/17457
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
# This applies to Xcode.
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
endif ()
target_sources(_base-x PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
endif ()
add_library(ch_contrib::base-x ALIAS _base-x)

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit 5e23e96c0c02e451dbb291cf9f66231d02b6cdb6
Subproject commit 3f975ecab377cd5f739af780566596128f17bb74

1
contrib/qpl vendored Submodule

@ -0,0 +1 @@
Subproject commit cdc8442f7a5e7a6ff6eea39c69665e0c5034d85d

View File

@ -0,0 +1,322 @@
## The Intel® QPL provides high performance implementations of data processing functions for existing hardware accelerator, and/or software path in case if hardware accelerator is not available.
if (OS_LINUX AND ARCH_AMD64 AND (ENABLE_AVX2 OR ENABLE_AVX512))
option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES})
elseif(ENABLE_QPL)
message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with avx2/avx512 support")
endif()
if (NOT ENABLE_QPL)
message(STATUS "Not using QPL")
return()
endif()
set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl")
set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources")
set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl")
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
set (EFFICIENT_WAIT ON)
set (BLOCK_ON_FAULT ON)
set (LOG_HW_INIT OFF)
set (SANITIZE_MEMORY OFF)
set (SANITIZE_THREADS OFF)
set (LIB_FUZZING_ENGINE OFF)
function(GetLibraryVersion _content _outputVar)
string(REGEX MATCHALL "Qpl VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}")
SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE)
endfunction()
FILE(READ "${QPL_PROJECT_DIR}/CMakeLists.txt" HEADER_CONTENT)
GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION)
message(STATUS "Intel QPL version: ${QPL_VERSION}")
# There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api.
# Generate 7 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, core_iaa, middle_layer_lib.
# Output ch_contrib::qpl by linking with 7 library targets.
include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
# check nasm compiler
include(CheckLanguage)
check_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER)
message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
endif()
# [SUBDIR]isal
enable_language(ASM_NASM)
set(ISAL_C_SRC ${QPL_SRC_DIR}/isal/igzip/adler32_base.c
${QPL_SRC_DIR}/isal/igzip/huff_codes.c
${QPL_SRC_DIR}/isal/igzip/hufftables_c.c
${QPL_SRC_DIR}/isal/igzip/igzip.c
${QPL_SRC_DIR}/isal/igzip/igzip_base.c
${QPL_SRC_DIR}/isal/igzip/flatten_ll.c
${QPL_SRC_DIR}/isal/igzip/encode_df.c
${QPL_SRC_DIR}/isal/igzip/igzip_icf_base.c
${QPL_SRC_DIR}/isal/igzip/igzip_inflate.c
${QPL_SRC_DIR}/isal/igzip/igzip_icf_body.c
${QPL_SRC_DIR}/isal/crc/crc_base.c
${QPL_SRC_DIR}/isal/crc/crc64_base.c)
set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm
${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_04.asm
${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_06.asm
${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_04.asm
${QPL_SRC_DIR}/isal/igzip/igzip_finish.asm
${QPL_SRC_DIR}/isal/igzip/encode_df_04.asm
${QPL_SRC_DIR}/isal/igzip/encode_df_06.asm
${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_01.asm
${QPL_SRC_DIR}/isal/igzip/proc_heap.asm
${QPL_SRC_DIR}/isal/igzip/igzip_icf_body_h1_gr_bt.asm
${QPL_SRC_DIR}/isal/igzip/igzip_icf_finish.asm
${QPL_SRC_DIR}/isal/igzip/igzip_inflate_multibinary.asm
${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_01.asm
${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_04.asm
${QPL_SRC_DIR}/isal/igzip/rfc1951_lookup.asm
${QPL_SRC_DIR}/isal/igzip/adler32_sse.asm
${QPL_SRC_DIR}/isal/igzip/adler32_avx2_4.asm
${QPL_SRC_DIR}/isal/igzip/igzip_deflate_hash.asm
${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_04.asm
${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_06.asm
${QPL_SRC_DIR}/isal/igzip/igzip_multibinary.asm
${QPL_SRC_DIR}/isal/igzip/stdmac.asm
${QPL_SRC_DIR}/isal/crc/crc_multibinary.asm
${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8.asm
${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8_02.asm
${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by16_10.asm
${QPL_SRC_DIR}/isal/crc/crc32_ieee_01.asm
${QPL_SRC_DIR}/isal/crc/crc32_ieee_02.asm
${QPL_SRC_DIR}/isal/crc/crc32_ieee_by4.asm
${QPL_SRC_DIR}/isal/crc/crc32_ieee_by16_10.asm
${QPL_SRC_DIR}/isal/crc/crc32_iscsi_00.asm
${QPL_SRC_DIR}/isal/crc/crc32_iscsi_01.asm
${QPL_SRC_DIR}/isal/crc/crc32_iscsi_by16_10.asm)
# Adding ISA-L library target
add_library(isal OBJECT ${ISAL_C_SRC})
add_library(isal_asm OBJECT ${ISAL_ASM_SRC})
# Setting external and internal interfaces for ISA-L library
target_include_directories(isal
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/isal/include>
PRIVATE ${QPL_SRC_DIR}/isal/include
PUBLIC ${QPL_SRC_DIR}/isal/igzip)
target_compile_options(isal PRIVATE
"$<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>"
"$<$<CONFIG:Debug>:>"
"$<$<CONFIG:Release>:>")
target_compile_options(isal_asm PUBLIC "-I${QPL_SRC_DIR}/isal/include/"
PUBLIC "-I${QPL_SRC_DIR}/isal/igzip/"
PUBLIC "-I${QPL_SRC_DIR}/isal/crc/"
PUBLIC "-DQPL_LIB")
# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
# AS_FEATURE_LEVEL=5 means "Check SIMD capabilities of the target system at runtime and use up to AVX2 if available".
# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
if (ENABLE_AVX512)
target_compile_options(isal_asm PUBLIC "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10")
else()
target_compile_options(isal_asm PUBLIC "-DAS_FEATURE_LEVEL=5")
endif()
# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS.
# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined"
if (SANITIZE STREQUAL "undefined")
get_target_property(target_options isal_asm COMPILE_OPTIONS)
list(REMOVE_ITEM target_options "-fno-sanitize=undefined")
set_property(TARGET isal_asm PROPERTY COMPILE_OPTIONS ${target_options})
endif()
target_compile_definitions(isal PUBLIC
QPL_LIB
NDEBUG)
# [SUBDIR]core-sw
# Two libraries:qplcore_avx512/qplcore_px for SW fallback will be created which are implemented by AVX512 and non-AVX512 instructions respectively.
# The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function.
# Hence, here we don't need put qplcore_avx512 under an ENABLE_AVX512 CMake switch.
# Actually, if we do that, some undefined symbols errors would happen because both of AVX512 function and non-AVX512 function are referenced by QPL API.
# PLATFORM=2 means AVX512 implementation; PLATFORM=0 means non-AVX512 implementation.
# Find Core Sources
file(GLOB SOURCES
${QPL_SRC_DIR}/core-sw/src/checksums/*.c
${QPL_SRC_DIR}/core-sw/src/filtering/*.c
${QPL_SRC_DIR}/core-sw/src/other/*.c
${QPL_SRC_DIR}/core-sw/src/compression/*.c)
file(GLOB DATA_SOURCES
${QPL_SRC_DIR}/core-sw/src/data/*.c)
# Create avx512 library
add_library(qplcore_avx512 OBJECT ${SOURCES})
target_compile_definitions(qplcore_avx512 PRIVATE PLATFORM=2)
target_include_directories(qplcore_avx512
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(qplcore_avx512 PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
target_link_libraries(qplcore_avx512 ${CMAKE_DL_LIBS} isal)
target_compile_options(qplcore_avx512
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE -march=skylake-avx512
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
target_compile_definitions(qplcore_avx512 PUBLIC QPL_BADARG_CHECK)
#
# Create px library
#
#set(CMAKE_INCLUDE_CURRENT_DIR ON)
# Create library
add_library(qplcore_px OBJECT ${SOURCES} ${DATA_SOURCES})
target_compile_definitions(qplcore_px PRIVATE PLATFORM=0)
target_include_directories(qplcore_px
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(qplcore_px PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
target_link_libraries(qplcore_px isal ${CMAKE_DL_LIBS})
target_compile_options(qplcore_px
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
target_compile_definitions(qplcore_px PUBLIC QPL_BADARG_CHECK)
# [SUBDIR]core-iaa
file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
${QPL_SRC_DIR}/core-iaa/sources/aecs/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.c
${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.c
${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/bit_rev.c)
# Create library
add_library(core_iaa OBJECT ${HW_PATH_SRC})
target_include_directories(core_iaa
PRIVATE ${UUID_DIR}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/include>
PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include>
PRIVATE $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>)
target_compile_options(core_iaa
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>)
target_compile_features(core_iaa PRIVATE c_std_11)
target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK
PRIVATE $<$<BOOL:${BLOCK_ON_FAULT}>: BLOCK_ON_FAULT_ENABLED>
PRIVATE $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>)
# [SUBDIR]middle-layer
generate_unpack_kernel_arrays(${QPL_BINARY_DIR})
file(GLOB MIDDLE_LAYER_SRC
${QPL_SRC_DIR}/middle-layer/analytics/*.cpp
${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp
${QPL_SRC_DIR}/middle-layer/checksum/*.cpp
${QPL_SRC_DIR}/middle-layer/common/*.cpp
${QPL_SRC_DIR}/middle-layer/compression/*.cpp
${QPL_SRC_DIR}/middle-layer/compression/*/*.cpp
${QPL_SRC_DIR}/middle-layer/compression/*/*/*.cpp
${QPL_SRC_DIR}/middle-layer/dispatcher/*.cpp
${QPL_SRC_DIR}/middle-layer/other/*.cpp
${QPL_SRC_DIR}/middle-layer/util/*.cpp
${QPL_SRC_DIR}/middle-layer/inflate/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo
file(GLOB GENERATED_PX_TABLES_SRC ${QPL_BINARY_DIR}/generated/px_*.cpp)
file(GLOB GENERATED_AVX512_TABLES_SRC ${QPL_BINARY_DIR}/generated/avx512_*.cpp)
add_library(middle_layer_lib OBJECT
${GENERATED_PX_TABLES_SRC}
${GENERATED_AVX512_TABLES_SRC}
${MIDDLE_LAYER_SRC})
target_compile_options(middle_layer_lib
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
target_compile_definitions(middle_layer_lib
PUBLIC QPL_VERSION="${QPL_VERSION}"
PUBLIC $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
PUBLIC $<$<BOOL:${EFFICIENT_WAIT}>:QPL_EFFICIENT_WAIT>
PUBLIC QPL_BADARG_CHECK)
set_source_files_properties(${GENERATED_PX_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0)
set_source_files_properties(${GENERATED_AVX512_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2)
target_include_directories(middle_layer_lib
PRIVATE ${UUID_DIR}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/middle-layer>
PUBLIC $<TARGET_PROPERTY:_qpl,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_px,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:core_iaa,INTERFACE_INCLUDE_DIRECTORIES>)
target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB)
# [SUBDIR]c_api
file(GLOB_RECURSE QPL_C_API_SRC
${QPL_SRC_DIR}/c_api/*.c
${QPL_SRC_DIR}/c_api/*.cpp)
add_library(_qpl STATIC ${QPL_C_API_SRC}
$<TARGET_OBJECTS:middle_layer_lib>
$<TARGET_OBJECTS:isal>
$<TARGET_OBJECTS:isal_asm>
$<TARGET_OBJECTS:qplcore_px>
$<TARGET_OBJECTS:qplcore_avx512>
$<TARGET_OBJECTS:core_iaa>
$<TARGET_OBJECTS:middle_layer_lib>)
target_include_directories(_qpl
PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/>
PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>
PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>)
target_compile_options(_qpl
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
target_compile_definitions(_qpl
PRIVATE -DQPL_LIB
PRIVATE -DQPL_BADARG_CHECK
PUBLIC -DENABLE_QPL_COMPRESSION)
target_link_libraries(_qpl
PRIVATE ${CMAKE_DL_LIBS})
add_library (ch_contrib::qpl ALIAS _qpl)
target_include_directories(_qpl SYSTEM BEFORE PUBLIC "${QPL_PROJECT_DIR}/include")

View File

@ -0,0 +1,4 @@
#ifndef _QPL_UUID_UUID_H
#define _QPL_UUID_UUID_H
typedef unsigned char uuid_t[16];
#endif /* _QPL_UUID_UUID_H */

View File

@ -51,6 +51,7 @@ RUN apt-get update \
rename \
software-properties-common \
tzdata \
nasm \
--yes --no-install-recommends \
&& apt-get clean
@ -118,5 +119,20 @@ ENV GOCACHE=/workdir/
RUN mkdir /workdir && chmod 777 /workdir
WORKDIR /workdir
# FIXME: thread sanitizer is broken in clang-14, we have to build it with clang-13
# https://github.com/ClickHouse/ClickHouse/pull/39450
# https://github.com/google/sanitizers/issues/1540
# https://github.com/google/sanitizers/issues/1552
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-13 main" >> \
/etc/apt/sources.list.d/clang.list \
&& apt-get update \
&& apt-get install \
clang-13 \
clang-tidy-13 \
--yes --no-install-recommends \
&& apt-get clean
COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -323,6 +323,7 @@ if __name__ == "__main__":
parser.add_argument(
"--compiler",
choices=(
"clang-13", # For TSAN builds, see #39450
"clang-14",
"clang-14-darwin",
"clang-14-darwin-aarch64",

View File

@ -55,6 +55,7 @@ RUN apt-get update \
pkg-config \
tzdata \
pv \
nasm \
--yes --no-install-recommends
# Sanitizer options for services (clickhouse-server)

View File

@ -72,6 +72,7 @@ RUN apt-get update \
tzdata \
unixodbc \
file \
nasm \
--yes --no-install-recommends
RUN pip3 install numpy scipy pandas Jinja2

View File

@ -7,29 +7,26 @@ set -x
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
#
# But under thread fuzzer, TSan build is too slow and this produces some flaky
# tests, so for now, as a temporary solution it had been disabled.
if ! test -f package_folder/clickhouse-server*tsan*.deb; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
fi
function install_packages()
{

View File

@ -17,7 +17,9 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -40,10 +40,10 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status):
with open(results_file, "w") as f:
with open(results_file, "w", encoding="utf-8") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, "w") as f:
with open(status_file, "w", encoding="utf-8") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
@ -53,9 +53,10 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of style check"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
default_dir = "/test_output"
parser.add_argument("--in-results-dir", default=default_dir)
parser.add_argument("--out-results-file", default=f"{default_dir}/test_results.tsv")
parser.add_argument("--out-status-file", default=f"{default_dir}/check_status.tsv")
args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir)

View File

@ -0,0 +1,36 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.6.4.35-stable FIXME as compared to v22.6.3.35-stable
#### Build/Testing/Packaging Improvement
* Backported in [#38822](https://github.com/ClickHouse/ClickHouse/issues/38822): - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#38242](https://github.com/ClickHouse/ClickHouse/issues/38242): Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#38865](https://github.com/ClickHouse/ClickHouse/issues/38865): Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#38853](https://github.com/ClickHouse/ClickHouse/issues/38853): Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#38942](https://github.com/ClickHouse/ClickHouse/issues/38942): - Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#39063](https://github.com/ClickHouse/ClickHouse/issues/39063): Any allocations inside OvercommitTracker may lead to deadlock. Logging was not very informative so it's easier just to remove logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#39077](https://github.com/ClickHouse/ClickHouse/issues/39077): Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#39151](https://github.com/ClickHouse/ClickHouse/issues/39151): Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#39275](https://github.com/ClickHouse/ClickHouse/issues/39275): Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#39371](https://github.com/ClickHouse/ClickHouse/issues/39371): Declare RabbitMQ queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)).
* Backported in [#39352](https://github.com/ClickHouse/ClickHouse/issues/39352): Fix incorrect fetch postgresql tables query fro PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### NO CL CATEGORY
* Backported in [#38685](https://github.com/ClickHouse/ClickHouse/issues/38685):. [#38449](https://github.com/ClickHouse/ClickHouse/pull/38449) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Use native Map type for OpenTelemetry attributes [#38814](https://github.com/ClickHouse/ClickHouse/pull/38814) ([Ilya Yatsishin](https://github.com/qoega)).
* Retry docker buildx commands with progressive sleep in between [#38898](https://github.com/ClickHouse/ClickHouse/pull/38898) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add docker_server.py running to backport and release CIs [#39011](https://github.com/ClickHouse/ClickHouse/pull/39011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix meilisearch tests [#39110](https://github.com/ClickHouse/ClickHouse/pull/39110) ([Kseniia Sumarokova](https://github.com/kssenii)).

View File

@ -0,0 +1,468 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.7.1.2484-stable (f4f05ec786a) FIXME as compared to v22.6.1.1985-stable (7000c4e0033)
#### Backward Incompatible Change
* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new ones. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* * Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)).
* Disable format_csv_allow_single_quotes by default. [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). [#39423](https://github.com/ClickHouse/ClickHouse/pull/39423) ([Kruglov Pavel](https://github.com/Avogar)).
#### New Feature
* Add new `direct` join algorithm for RocksDB, ref [#33582](https://github.com/ClickHouse/ClickHouse/issues/33582). [#35363](https://github.com/ClickHouse/ClickHouse/pull/35363) ([Vladimir C](https://github.com/vdimir)).
* * Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)).
* Add a setting `zstd_window_log_max` to configure max memory usage on zstd decoding when importing external files. Closes [#35693](https://github.com/ClickHouse/ClickHouse/issues/35693). [#37015](https://github.com/ClickHouse/ClickHouse/pull/37015) ([wuxiaobai24](https://github.com/wuxiaobai24)).
* Implement NatsStorage - table engine, which allows to pub/sub to NATS. Closes [#32388](https://github.com/ClickHouse/ClickHouse/issues/32388). [#37171](https://github.com/ClickHouse/ClickHouse/pull/37171) ([tchepavel](https://github.com/tchepavel)).
* Implement table function MongoDB. Allow writes into MongoDB storage / table function. [#37213](https://github.com/ClickHouse/ClickHouse/pull/37213) ([aaapetrenko](https://github.com/aaapetrenko)).
* `clickhouse-keeper` new feature: add support for real-time digest calculation and verification. [#37555](https://github.com/ClickHouse/ClickHouse/pull/37555) ([Antonio Andelic](https://github.com/antonio2368)).
* In [#17202](https://github.com/ClickHouse/ClickHouse/issues/17202) was reported that host_regexp was being tested against only one of the possible PTR responses. This PR makes the necessary changes so that host_regexp is applied against all possible PTR responses and validate if any matches. [#37827](https://github.com/ClickHouse/ClickHouse/pull/37827) ([Arthur Passos](https://github.com/arthurpassos)).
* Support hadoop secure rpc transfer(hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#37852](https://github.com/ClickHouse/ClickHouse/pull/37852) ([Peng Liu](https://github.com/michael1589)).
* Add struct type support in `StorageHive`. [#38118](https://github.com/ClickHouse/ClickHouse/pull/38118) ([lgbo](https://github.com/lgbo-ustc)).
* Added Base58 encoding/decoding. [#38159](https://github.com/ClickHouse/ClickHouse/pull/38159) ([Andrey Zvonov](https://github.com/zvonand)).
* Add chart visualization to Play UI. [#38197](https://github.com/ClickHouse/ClickHouse/pull/38197) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* support `alter` command on `StorageHive` table. [#38214](https://github.com/ClickHouse/ClickHouse/pull/38214) ([lgbo](https://github.com/lgbo-ustc)).
* Added `CREATE TABLE ... EMPTY AS SELECT` query. It automatically deduces table structure from the SELECT query, but does not fill the table after creation. Resolves [#38049](https://github.com/ClickHouse/ClickHouse/issues/38049). [#38272](https://github.com/ClickHouse/ClickHouse/pull/38272) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Adds new setting `implicit_transaction` to run standalone queries inside a transaction. It handles both creation and closing (via COMMIT if the query succeeded or ROLLBACK if it didn't) of the transaction automatically. [#38344](https://github.com/ClickHouse/ClickHouse/pull/38344) ([Raúl Marín](https://github.com/Algunenano)).
* Allow trailing comma in columns list. closes [#38425](https://github.com/ClickHouse/ClickHouse/issues/38425). [#38440](https://github.com/ClickHouse/ClickHouse/pull/38440) ([chen](https://github.com/xiedeyantu)).
* Compress clickhouse into self-extracting executable (path programs/self-extracting). New build target 'self-extracting' is added. [#38447](https://github.com/ClickHouse/ClickHouse/pull/38447) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Introduced settings `additional_table_filters`. Using this setting, you can specify additional filtering condition for a table which will be applied directly after reading. Example: `select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers : 'number != 3', 'table_1' : 'x != 2'}`. Introduced setting `additional_result_filter` which specifies additional filtering condition for query result. Closes [#37918](https://github.com/ClickHouse/ClickHouse/issues/37918). [#38475](https://github.com/ClickHouse/ClickHouse/pull/38475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add SQLInsert output format. Closes [#38441](https://github.com/ClickHouse/ClickHouse/issues/38441). [#38477](https://github.com/ClickHouse/ClickHouse/pull/38477) ([Kruglov Pavel](https://github.com/Avogar)).
* Downloadable clickhouse executable is compressed self-extracting. [#38653](https://github.com/ClickHouse/ClickHouse/pull/38653) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support `isNullable` function. This function checks whether it's argument is nullable and return true(1) or false(0). Closes [#38611](https://github.com/ClickHouse/ClickHouse/issues/38611). [#38841](https://github.com/ClickHouse/ClickHouse/pull/38841) ([lokax](https://github.com/lokax)).
* Add functions `translate(string, from_string, to_string)` and `translateUTF8(string, from_string, to_string)`. [#38935](https://github.com/ClickHouse/ClickHouse/pull/38935) ([Nikolay Degterinsky](https://github.com/evillique)).
* Add `compatibility` setting and `system.settings_changes` system table that contains information about changes in settings through ClickHouse versions. Closes [#35972](https://github.com/ClickHouse/ClickHouse/issues/35972). [#38957](https://github.com/ClickHouse/ClickHouse/pull/38957) ([Kruglov Pavel](https://github.com/Avogar)).
* Add the 3rd parameter to the tupleElement function and return it if tuple doesn't have a member. Only works if the 2nd parameter is of type String. Closes [#38872](https://github.com/ClickHouse/ClickHouse/issues/38872). [#38989](https://github.com/ClickHouse/ClickHouse/pull/38989) ([lokax](https://github.com/lokax)).
* Support parseTimedelta function. It can be used like ```sql # ' ', ';', '-', '+', ',', ':' can be used as separators, eg. "1yr-2mo", "2m:6s" SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds');. [#39071](https://github.com/ClickHouse/ClickHouse/pull/39071) ([jiahui-97](https://github.com/jiahui-97)).
* Added options to limit IO operations with remote storage: `max_remote_read_network_bandwidth_for_server` and `max_remote_write_network_bandwidth_for_server`. [#39095](https://github.com/ClickHouse/ClickHouse/pull/39095) ([Sergei Trifonov](https://github.com/serxa)).
* Add `send_logs_source_regexp` setting. Send server text logs with specified regexp to match log source name. Empty means all sources. [#39161](https://github.com/ClickHouse/ClickHouse/pull/39161) ([Amos Bird](https://github.com/amosbird)).
* OpenTelemetry now collects traces without Processors spans by default. To enable Processors spans collection `opentelemetry_trace_processors` setting. [#39170](https://github.com/ClickHouse/ClickHouse/pull/39170) ([Ilya Yatsishin](https://github.com/qoega)).
#### Performance Improvement
* Add new `local_filesystem_read_method` method `io_uring` based on the asynchronous Linux [io_uring](https://kernel.dk/io_uring.pdf) subsystem, improving read performance almost universally compared to the default `pread` method. [#36103](https://github.com/ClickHouse/ClickHouse/pull/36103) ([Saulius Valatka](https://github.com/sauliusvl)).
* Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)).
* Add VBMI optimized copyOverlap32Shuffle for LZ4 decompress. [#37891](https://github.com/ClickHouse/ClickHouse/pull/37891) ([Guo Wangyang](https://github.com/guowangy)).
* Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix significant join performance regression which was introduced in https://github.com/ClickHouse/ClickHouse/pull/35616 . It's interesting that common join queries such as ssb queries have been 10 times slower for almost 3 months while no one complains. [#38052](https://github.com/ClickHouse/ClickHouse/pull/38052) ([Amos Bird](https://github.com/amosbird)).
* Migrate from the Intel hyperscan library to vectorscan, this speeds up many string matching on non-x86 platforms. [#38171](https://github.com/ClickHouse/ClickHouse/pull/38171) ([Robert Schulze](https://github.com/rschu1ze)).
* Increased parallelism of query plan steps executed after aggregation. [#38295](https://github.com/ClickHouse/ClickHouse/pull/38295) ([Nikita Taranov](https://github.com/nickitat)).
* Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)).
* Optimized insertion and lookups in the HashTable. [#38413](https://github.com/ClickHouse/ClickHouse/pull/38413) ([Nikita Taranov](https://github.com/nickitat)).
* Fix performance degradation from [#32493](https://github.com/ClickHouse/ClickHouse/issues/32493). [#38417](https://github.com/ClickHouse/ClickHouse/pull/38417) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve performance of column vector replicate using SIMD instructions. Author @zzachimed. [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)).
* Norm and Distance functions for arrays speed up 1.2-2 times. [#38740](https://github.com/ClickHouse/ClickHouse/pull/38740) ([Alexander Gololobov](https://github.com/davenger)).
* A less efficient execution plan can be generated for query with ORDER BY (a, b) than for ORDER BY a, b. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)).
* Executable UDF, Executable Dictionary, Executable Storage poll subprocess fix 1 second subprocess wait during subprocess termination. [#38929](https://github.com/ClickHouse/ClickHouse/pull/38929) ([Constantine Peresypkin](https://github.com/pkit)).
* * Pushdown filter to the right side of sorting join. [#39123](https://github.com/ClickHouse/ClickHouse/pull/39123) ([Vladimir C](https://github.com/vdimir)).
* Optimize accesses to system.stack_trace. [#39177](https://github.com/ClickHouse/ClickHouse/pull/39177) ([Azat Khuzhin](https://github.com/azat)).
#### Improvement
* Optimized processing of ORDER BY in window functions. [#34632](https://github.com/ClickHouse/ClickHouse/pull/34632) ([Vladimir Chebotarev](https://github.com/excitoon)).
* Support SQL standard create index and drop index syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)).
* use simd to re-write the current column replicate funcion and got 2x performance boost in our unit benchmark test. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)).
* Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)).
* Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)).
* * Bugfixes and performance improvements for `parallel_hash`. [#37648](https://github.com/ClickHouse/ClickHouse/pull/37648) ([Vladimir C](https://github.com/vdimir)).
* Support expressions with window functions. Closes [#19857](https://github.com/ClickHouse/ClickHouse/issues/19857). [#37848](https://github.com/ClickHouse/ClickHouse/pull/37848) ([Dmitry Novik](https://github.com/novikd)).
* S3 single objects are now removed with `RemoveObjectRequest` (sic). Fixed a bug with `S3ObjectStorage` on GCP which did not allow to use `removeFileIfExists` effectively breaking approximately half of `remove` functionality. Automatic detection for `DeleteObjects` S3 API, that is not supported by GCS. This will allow to use GCS without explicit `support_batch_delete=0` in configuration. [#37882](https://github.com/ClickHouse/ClickHouse/pull/37882) ([Vladimir Chebotarev](https://github.com/excitoon)).
* Fix refcnt for unused MergeTree parts in SELECT queries (may defer parts removal). [#37913](https://github.com/ClickHouse/ClickHouse/pull/37913) ([Azat Khuzhin](https://github.com/azat)).
* Expose basic Keeper related monitoring data (via ProfileEvents and CurrentMetrics). [#38072](https://github.com/ClickHouse/ClickHouse/pull/38072) ([lingpeng0314](https://github.com/lingpeng0314)).
* Added kerberosInit function and corresponding KerberosInit class as a replacement for kinit executable. Replaced all calls of kinit in Kafka and HDFS code by call of kerberosInit function. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)).
* * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)).
* improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)).
* Add ability to pass headers to url table function / storage via sql. Closes [#37897](https://github.com/ClickHouse/ClickHouse/issues/37897). [#38176](https://github.com/ClickHouse/ClickHouse/pull/38176) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Enable trace collection for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)).
* Do not skip symlinks in `user_defined` directory during SQL user defined functions loading. Closes [#38042](https://github.com/ClickHouse/ClickHouse/issues/38042). [#38184](https://github.com/ClickHouse/ClickHouse/pull/38184) ([Maksim Kita](https://github.com/kitaisreal)).
* Improve the stability for hive storage integration test. Move the data prepare step into test.py. [#38260](https://github.com/ClickHouse/ClickHouse/pull/38260) ([lgbo](https://github.com/lgbo-ustc)).
* Added background cleanup of subdirectories in `store/`. In some cases clickhouse-server might left garbage subdirectories in `store/` (for example, on unsuccessful table creation) and those dirs were never been removed. Fixes [#33710](https://github.com/ClickHouse/ClickHouse/issues/33710). [#38265](https://github.com/ClickHouse/ClickHouse/pull/38265) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add `DESCRIBE CACHE` query to show cache settings from config. Add `SHOW CACHES` query to show available filesystem caches list. [#38279](https://github.com/ClickHouse/ClickHouse/pull/38279) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add access check for system drop fs cache. Support ON CLUSTER. [#38319](https://github.com/ClickHouse/ClickHouse/pull/38319) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support `auto_close` option for postgres engine connection. Closes [#31486](https://github.com/ClickHouse/ClickHouse/issues/31486). [#38363](https://github.com/ClickHouse/ClickHouse/pull/38363) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix PostgreSQL database engine incompatibility on upgrade from 21.3 to 22.3. Closes [#36659](https://github.com/ClickHouse/ClickHouse/issues/36659). [#38369](https://github.com/ClickHouse/ClickHouse/pull/38369) ([Kseniia Sumarokova](https://github.com/kssenii)).
* `filesystemAvailable` and similar functions now work in `clickhouse-local`. This closes [#38423](https://github.com/ClickHouse/ClickHouse/issues/38423). [#38424](https://github.com/ClickHouse/ClickHouse/pull/38424) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Hardware benchmark now has support for automatic results uploading. [#38427](https://github.com/ClickHouse/ClickHouse/pull/38427) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The table `system.asynchronous_metric_log` is further optimized for storage space. This closes [#38134](https://github.com/ClickHouse/ClickHouse/issues/38134). See the [YouTube video](https://www.youtube.com/watch?v=0fSp9SF8N8A). [#38428](https://github.com/ClickHouse/ClickHouse/pull/38428) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Functions multiMatchAny(), multiMatchAnyIndex(), multiMatchAllIndices() and their fuzzy variants now accept non-const pattern array argument. [#38485](https://github.com/ClickHouse/ClickHouse/pull/38485) ([Robert Schulze](https://github.com/rschu1ze)).
* Added L2 Squared distance and norm for both arrays and tuples. [#38545](https://github.com/ClickHouse/ClickHouse/pull/38545) ([Julian Gilyadov](https://github.com/israelg99)).
* Add revision() function. [#38555](https://github.com/ClickHouse/ClickHouse/pull/38555) ([Azat Khuzhin](https://github.com/azat)).
* Add `group_by_use_nulls` setting to make aggregation key columns nullable in the case of ROLLUP, CUBE and GROUPING SETS. Closes [#37359](https://github.com/ClickHouse/ClickHouse/issues/37359). [#38642](https://github.com/ClickHouse/ClickHouse/pull/38642) ([Dmitry Novik](https://github.com/novikd)).
* Fix GCS via proxy tunnel usage. [#38726](https://github.com/ClickHouse/ClickHouse/pull/38726) ([Azat Khuzhin](https://github.com/azat)).
* Support `\i file` in clickhouse client / local (similar to psql \i). [#38813](https://github.com/ClickHouse/ClickHouse/pull/38813) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow null modifier in columns declaration for table functions. [#38816](https://github.com/ClickHouse/ClickHouse/pull/38816) ([Kruglov Pavel](https://github.com/Avogar)).
* - Deactivate `mutations_finalizing_task` before shutdown to avoid `TABLE_IS_READ_ONLY` errors. [#38851](https://github.com/ClickHouse/ClickHouse/pull/38851) ([Raúl Marín](https://github.com/Algunenano)).
* Fix waiting of shared lock after exclusive lock failure. [#38864](https://github.com/ClickHouse/ClickHouse/pull/38864) ([Azat Khuzhin](https://github.com/azat)).
* Add the ability to specify compression level during data export. [#38907](https://github.com/ClickHouse/ClickHouse/pull/38907) ([Nikolay Degterinsky](https://github.com/evillique)).
* New option `rewrite` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)).
* - Stop reporting Zookeeper "Node exists" exceptions in system.errors when they are expected. [#38961](https://github.com/ClickHouse/ClickHouse/pull/38961) ([Raúl Marín](https://github.com/Algunenano)).
* Allow to specify globs `* or {expr1, expr2, expr3}` inside a key for `clickhouse-extract-from-config` tool. [#38966](https://github.com/ClickHouse/ClickHouse/pull/38966) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Add option enabling that SELECT from the system database requires grant. Details:. [#38970](https://github.com/ClickHouse/ClickHouse/pull/38970) ([Vitaly Baranov](https://github.com/vitlibar)).
* - clearOldLogs: Don't report KEEPER_EXCEPTION on concurrent deletes. [#39016](https://github.com/ClickHouse/ClickHouse/pull/39016) ([Raúl Marín](https://github.com/Algunenano)).
* clickhouse-keeper improvement: persist metainformation about keeper servers to disk. [#39069](https://github.com/ClickHouse/ClickHouse/pull/39069) ([Antonio Andelic](https://github.com/antonio2368)).
* Continue without exception when running out of disk space when using filesystem cache. [#39106](https://github.com/ClickHouse/ClickHouse/pull/39106) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Handling SIGTERM signals from k8s. [#39130](https://github.com/ClickHouse/ClickHouse/pull/39130) ([Timur Solodovnikov](https://github.com/tsolodov)).
* SQL function multiStringAllPositions() now accepts non-const needle arguments. [#39167](https://github.com/ClickHouse/ClickHouse/pull/39167) ([Robert Schulze](https://github.com/rschu1ze)).
* Add merge_algorithm (Undecided, Horizontal, Vertical) to system.part_log. [#39181](https://github.com/ClickHouse/ClickHouse/pull/39181) ([Azat Khuzhin](https://github.com/azat)).
* Improve isNullable/isConstant/isNull/isNotNull performance for LowCardinality argument. [#39192](https://github.com/ClickHouse/ClickHouse/pull/39192) ([Kruglov Pavel](https://github.com/Avogar)).
* - Don't report system.errors when the disk is not rotational. [#39216](https://github.com/ClickHouse/ClickHouse/pull/39216) ([Raúl Marín](https://github.com/Algunenano)).
* Metric `result_bytes` for `INSERT` queries in `system.query_log` shows number of bytes inserted. Previously value was incorrect and stored the same value as `result_rows`. [#39225](https://github.com/ClickHouse/ClickHouse/pull/39225) ([Ilya Yatsishin](https://github.com/qoega)).
* The CPU usage metric in clickhouse-client will be displayed in a better way. Fixes [#38756](https://github.com/ClickHouse/ClickHouse/issues/38756). [#39280](https://github.com/ClickHouse/ClickHouse/pull/39280) ([Sergei Trifonov](https://github.com/serxa)).
* Rethrow exception on filesystem cache initialisation on server startup, better error message. [#39386](https://github.com/ClickHouse/ClickHouse/pull/39386) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support milliseconds, microseconds and nanoseconds in `parseTimeDelta` function. [#39447](https://github.com/ClickHouse/ClickHouse/pull/39447) ([Kruglov Pavel](https://github.com/Avogar)).
#### Bug Fix
* Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)).
* * Fixed crash caused by IHiveFile be shared among threads. [#38887](https://github.com/ClickHouse/ClickHouse/pull/38887) ([lgbo](https://github.com/lgbo-ustc)).
#### Build/Testing/Packaging Improvement
* - Apply Clang Thread Safety Analysis (TSA) annotations to ClickHouse. [#38068](https://github.com/ClickHouse/ClickHouse/pull/38068) ([Robert Schulze](https://github.com/rschu1ze)).
* - System table "system.licenses" is now correctly populated on Mac (Darwin). [#38294](https://github.com/ClickHouse/ClickHouse/pull/38294) ([Robert Schulze](https://github.com/rschu1ze)).
* Handle full queue exception in clickhouse-test. If it happened we need to collect debug info to understand what queries didn't finish. [#38490](https://github.com/ClickHouse/ClickHouse/pull/38490) ([Dmitry Novik](https://github.com/novikd)).
* - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Always print stacktraces if test queue is full. Follow up [#38490](https://github.com/ClickHouse/ClickHouse/issues/38490) cc @tavplubix. [#38662](https://github.com/ClickHouse/ClickHouse/pull/38662) ([Dmitry Novik](https://github.com/novikd)).
* Align branches within a 32B boundary to make benchmark more stable. [#38988](https://github.com/ClickHouse/ClickHouse/pull/38988) ([Guo Wangyang](https://github.com/guowangy)).
* Fix LSan by fixing getauxval(). [#39299](https://github.com/ClickHouse/ClickHouse/pull/39299) ([Azat Khuzhin](https://github.com/azat)).
* Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Fix projection exception when aggregation keys are wrapped inside other functions. This fixes [#37151](https://github.com/ClickHouse/ClickHouse/issues/37151). [#37155](https://github.com/ClickHouse/ClickHouse/pull/37155) ([Amos Bird](https://github.com/amosbird)).
* Fix possible logical error `... with argument with type Nothing and default implementation for Nothing is expected to return result with type Nothing, got ...` in some functions. Closes: [#37610](https://github.com/ClickHouse/ClickHouse/issues/37610) Closes: [#37741](https://github.com/ClickHouse/ClickHouse/issues/37741). [#37759](https://github.com/ClickHouse/ClickHouse/pull/37759) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix incorrect columns order in subqueries of UNION (in case of duplicated columns in subselects may produce incorrect result). [#37887](https://github.com/ClickHouse/ClickHouse/pull/37887) ([Azat Khuzhin](https://github.com/azat)).
* Fix incorrect work of MODIFY ALTER Column with column names that contain dots. Closes [#37907](https://github.com/ClickHouse/ClickHouse/issues/37907). [#37971](https://github.com/ClickHouse/ClickHouse/pull/37971) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)).
* Fix rounding for `Decimal128/Decimal256` with more than 19-digits long scale. [#38027](https://github.com/ClickHouse/ClickHouse/pull/38027) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix "Missing columns" for GLOBAL JOIN with CTE w/o alias. [#38056](https://github.com/ClickHouse/ClickHouse/pull/38056) ([Azat Khuzhin](https://github.com/azat)).
* Rewrite tuple functions as literals in backwards-compatibility mode. [#38096](https://github.com/ClickHouse/ClickHouse/pull/38096) ([Anton Kozlov](https://github.com/tonickkozlov)).
* - Fix redundant memory reservation for output block during `ORDER BY`. [#38127](https://github.com/ClickHouse/ClickHouse/pull/38127) ([iyupeng](https://github.com/iyupeng)).
* Fix possible logical error `Bad cast from type DB::IColumn* to DB::ColumnNullable*` in array mapped functions. Closes [#38006](https://github.com/ClickHouse/ClickHouse/issues/38006). [#38132](https://github.com/ClickHouse/ClickHouse/pull/38132) ([Kruglov Pavel](https://github.com/Avogar)).
* * Fix temporary name clash in partial merge join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#38135](https://github.com/ClickHouse/ClickHouse/pull/38135) ([Vladimir C](https://github.com/vdimir)).
* With table ```SQL CREATE TABLE nested_name_tuples ( `a` Tuple(x String, y Tuple(i Int32, j String)) ) ENGINE = Memory; ```. [#38136](https://github.com/ClickHouse/ClickHouse/pull/38136) ([lgbo](https://github.com/lgbo-ustc)).
* Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)).
* (Window View is a experimental feature) Fix LOGICAL_ERROR for WINDOW VIEW with incorrect structure. [#38205](https://github.com/ClickHouse/ClickHouse/pull/38205) ([Azat Khuzhin](https://github.com/azat)).
* Update librdkafka submodule to fix crash when an OAUTHBEARER refresh callback is set. [#38225](https://github.com/ClickHouse/ClickHouse/pull/38225) ([Rafael Acevedo](https://github.com/racevedoo)).
* Do not allow recursive usage of OvercommitTracker during logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794) cc @tavplubix @davenger. [#38246](https://github.com/ClickHouse/ClickHouse/pull/38246) ([Dmitry Novik](https://github.com/novikd)).
* Fix INSERT into Distributed hung due to ProfileEvents. [#38307](https://github.com/ClickHouse/ClickHouse/pull/38307) ([Azat Khuzhin](https://github.com/azat)).
* Fix retries in PostgreSQL engine. [#38310](https://github.com/ClickHouse/ClickHouse/pull/38310) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result). [#38324](https://github.com/ClickHouse/ClickHouse/pull/38324) ([Azat Khuzhin](https://github.com/azat)).
* Fix RabbitMQ with formats based on PeekableReadBuffer. Closes [#38061](https://github.com/ClickHouse/ClickHouse/issues/38061). [#38356](https://github.com/ClickHouse/ClickHouse/pull/38356) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix possible `Invalid number of rows in Chunk` in materialised pg. Closes [#37323](https://github.com/ClickHouse/ClickHouse/issues/37323). [#38360](https://github.com/ClickHouse/ClickHouse/pull/38360) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix RabbitMQ configuration with connection string setting. Closes [#36531](https://github.com/ClickHouse/ClickHouse/issues/36531). [#38365](https://github.com/ClickHouse/ClickHouse/pull/38365) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix PostgreSQL engine not using PostgreSQL schema when retrieving array dimension size. Closes [#36755](https://github.com/ClickHouse/ClickHouse/issues/36755). Closes [#36772](https://github.com/ClickHouse/ClickHouse/issues/36772). [#38366](https://github.com/ClickHouse/ClickHouse/pull/38366) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix incorrect result of distributed queries with `DISTINCT` and `LIMIT`. Fixes [#38282](https://github.com/ClickHouse/ClickHouse/issues/38282). [#38371](https://github.com/ClickHouse/ClickHouse/pull/38371) ([Anton Popov](https://github.com/CurtizJ)).
* fix: expose new CH keeper port in Dockerfile clickhouse/clickhouse-keeper fix: use correct KEEPER_CONFIG filename in clickhouse/clickhouse-keeper docker image. [#38462](https://github.com/ClickHouse/ClickHouse/pull/38462) ([Evgeny Kruglov](https://github.com/nordluf)).
* Fix parts removal (will be left forever if they had not been removed on server shutdown) after incorrect server shutdown. [#38486](https://github.com/ClickHouse/ClickHouse/pull/38486) ([Azat Khuzhin](https://github.com/azat)).
* Fixes [#38498](https://github.com/ClickHouse/ClickHouse/issues/38498) Current Implementation is similar to what shell does mentiond by @rschu1ze [here](https://github.com/ClickHouse/ClickHouse/pull/38502#issuecomment-1169057723). [#38502](https://github.com/ClickHouse/ClickHouse/pull/38502) ([Heena Bansal](https://github.com/HeenaBansal2009)).
* Fix table creation to avoid replication issues with pre-22.4 replicas. [#38541](https://github.com/ClickHouse/ClickHouse/pull/38541) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash for `mapUpdate`, `mapFilter` functions when using with constant map argument. Closes [#38547](https://github.com/ClickHouse/ClickHouse/issues/38547). [#38553](https://github.com/ClickHouse/ClickHouse/pull/38553) ([hexiaoting](https://github.com/hexiaoting)).
* Fix wrong results of countSubstrings() & position() on patterns with 0-bytes. [#38589](https://github.com/ClickHouse/ClickHouse/pull/38589) ([Robert Schulze](https://github.com/rschu1ze)).
* Now it's possible to start a clickhouse-server and attach/detach tables even for tables with the incorrect values of IPv4/IPv6 representation. Proper fix for issue [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#38590](https://github.com/ClickHouse/ClickHouse/pull/38590) ([alesapin](https://github.com/alesapin)).
* Adapt some more nodes to avoid issues with pre-22.4 replicas. [#38627](https://github.com/ClickHouse/ClickHouse/pull/38627) ([Raúl Marín](https://github.com/Algunenano)).
* Fix toHour() monotonicity which can lead to incorrect query result (incorrect index analysis). This fixes [#38333](https://github.com/ClickHouse/ClickHouse/issues/38333). [#38675](https://github.com/ClickHouse/ClickHouse/pull/38675) ([Amos Bird](https://github.com/amosbird)).
* `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `parallel_view_processing=1` with `optimize_trivial_insert_select=1`. Fix `max_insert_threads` while pushing to views. [#38731](https://github.com/ClickHouse/ClickHouse/pull/38731) ([Azat Khuzhin](https://github.com/azat)).
* Fix use-after-free for Map combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)).
* Fix throwing exception for seekable read from s3 (exception was not thrown). [#38773](https://github.com/ClickHouse/ClickHouse/pull/38773) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix checking whether s3 storage support parallel writes. It resulted in s3 parallel writes not working. [#38792](https://github.com/ClickHouse/ClickHouse/pull/38792) ([chen](https://github.com/xiedeyantu)).
* Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* MergeTree fix possible logical error for Vertical merges. [#38859](https://github.com/ClickHouse/ClickHouse/pull/38859) ([Maksim Kita](https://github.com/kitaisreal)).
* - Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)).
* Fix incorrect partition pruning when there is a nullable partition. This fixes [#38941](https://github.com/ClickHouse/ClickHouse/issues/38941). [#38946](https://github.com/ClickHouse/ClickHouse/pull/38946) ([Amos Bird](https://github.com/amosbird)).
* Fix fsync_part_directory for fetches. [#38993](https://github.com/ClickHouse/ClickHouse/pull/38993) ([Azat Khuzhin](https://github.com/azat)).
* Functions multiMatch[Fuzzy](AllIndices/Any/AnyIndex)() no throw a logical error if the needle argument is empty. [#39012](https://github.com/ClickHouse/ClickHouse/pull/39012) ([Robert Schulze](https://github.com/rschu1ze)).
* Any allocations inside OvercommitTracker may lead to deadlock. Logging was not very informative so it's easier just to remove logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)).
* Fix toHour() monotonicity which can lead to incorrect query result (incorrect index analysis). This fixes [#38333](https://github.com/ClickHouse/ClickHouse/issues/38333). [#39037](https://github.com/ClickHouse/ClickHouse/pull/39037) ([Amos Bird](https://github.com/amosbird)).
* Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix ActionsDAG construction for arguments of window expressions. Fixes [#38538](https://github.com/ClickHouse/ClickHouse/issues/38538) Allow using of higher-order functions in window expressions. [#39112](https://github.com/ClickHouse/ClickHouse/pull/39112) ([Dmitry Novik](https://github.com/novikd)).
* Keep `LowCardinality` type in `tuple()` function. Previously `LowCardinality` type was dropped and elements of created tuple had underlying type of `LowCardinality`. [#39113](https://github.com/ClickHouse/ClickHouse/pull/39113) ([Anton Popov](https://github.com/CurtizJ)).
* Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix unexpected query result when both `optimize_trivial_count_query` and `empty_result_for_aggregation_by_empty_set` are set to true. This fixes [#39140](https://github.com/ClickHouse/ClickHouse/issues/39140). [#39155](https://github.com/ClickHouse/ClickHouse/pull/39155) ([Amos Bird](https://github.com/amosbird)).
* Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix extremely rare race condition in during hardnlinks for remote fs. The only way to reproduce it is concurrent run of backups. [#39190](https://github.com/ClickHouse/ClickHouse/pull/39190) ([alesapin](https://github.com/alesapin)).
* Fix fetch of in-memory part with `allow_remote_fs_zero_copy_replication`. [#39214](https://github.com/ClickHouse/ClickHouse/pull/39214) ([Azat Khuzhin](https://github.com/azat)).
* Fix NOEXCEPT_SCOPE (before it calls std::terminate and looses the exception). [#39229](https://github.com/ClickHouse/ClickHouse/pull/39229) ([Azat Khuzhin](https://github.com/azat)).
* Declare RabbitMQ queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)).
* Fix segmentation fault in MaterializedPostgreSQL database engine, which could happen if some exception occurred at replication initialisation. Closes [#36939](https://github.com/ClickHouse/ClickHouse/issues/36939). [#39272](https://github.com/ClickHouse/ClickHouse/pull/39272) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix incorrect fetch postgresql tables query fro PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix possible UB in MergeTreeBackgroundExecutor (leads to SIGSEGV on race with DROP/DETACH). [#39342](https://github.com/ClickHouse/ClickHouse/pull/39342) ([Azat Khuzhin](https://github.com/azat)).
* Avoid possible abort() in CapnProto on exception descruction. Closes [#30706](https://github.com/ClickHouse/ClickHouse/issues/30706). [#39365](https://github.com/ClickHouse/ClickHouse/pull/39365) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix behaviour of dictHas for direct dictionaries when multiple lookups to the same key are made in a single action. [#39385](https://github.com/ClickHouse/ClickHouse/pull/39385) ([James Morrison](https://github.com/jawm)).
* Fix crash which may happen while reading from dictionary with `DateTime64` attribute. Fixes [#38930](https://github.com/ClickHouse/ClickHouse/issues/38930). [#39391](https://github.com/ClickHouse/ClickHouse/pull/39391) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix WriteBuffer finalize in destructor when cacnel query that could lead to stuck query or even terminate. Closes [#38199](https://github.com/ClickHouse/ClickHouse/issues/38199). [#39396](https://github.com/ClickHouse/ClickHouse/pull/39396) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix UB (stack-use-after-scope) in extactAll(). [#39397](https://github.com/ClickHouse/ClickHouse/pull/39397) ([Azat Khuzhin](https://github.com/azat)).
* Fix incorrect query result when trivial count optimization is in effect with array join. This fixes [#39431](https://github.com/ClickHouse/ClickHouse/issues/39431). [#39444](https://github.com/ClickHouse/ClickHouse/pull/39444) ([Amos Bird](https://github.com/amosbird)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Disable send_logs_level for INSERT into Distributed to avoid possible hung. [#35075](https://github.com/ClickHouse/ClickHouse/pull/35075) ([Azat Khuzhin](https://github.com/azat)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Add a setting to use more memory for zstd decompression"'. [#38194](https://github.com/ClickHouse/ClickHouse/pull/38194) ([alesapin](https://github.com/alesapin)).
* NO CL ENTRY: 'Revert "Revert "Add a setting to use more memory for zstd decompression""'. [#38196](https://github.com/ClickHouse/ClickHouse/pull/38196) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "ClickHouse's boringssl module updated to the official version of the FIPS compliant."'. [#38201](https://github.com/ClickHouse/ClickHouse/pull/38201) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result)"'. [#38361](https://github.com/ClickHouse/ClickHouse/pull/38361) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Add support for io_uring read method"'. [#38377](https://github.com/ClickHouse/ClickHouse/pull/38377) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Revert "Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result)""'. [#38449](https://github.com/ClickHouse/ClickHouse/pull/38449) ([Maksim Kita](https://github.com/kitaisreal)).
* NO CL ENTRY: 'Don't spoil return code of integration tests runner with redundant tee'. [#38548](https://github.com/ClickHouse/ClickHouse/pull/38548) ([Vladimir Chebotarev](https://github.com/excitoon)).
* NO CL ENTRY: 'Revert "Non Negative Derivative window function"'. [#38551](https://github.com/ClickHouse/ClickHouse/pull/38551) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Upload to S3 compressed self-extracting clickhouse"'. [#38788](https://github.com/ClickHouse/ClickHouse/pull/38788) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "Smallish updates of dev guide"'. [#38848](https://github.com/ClickHouse/ClickHouse/pull/38848) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix toHour() monotonicity which can lead to incorrect query result (incorrect index analysis)"'. [#39001](https://github.com/ClickHouse/ClickHouse/pull/39001) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix WriteBuffer finalize in destructor when cacnel query"'. [#39433](https://github.com/ClickHouse/ClickHouse/pull/39433) ([Kruglov Pavel](https://github.com/Avogar)).
* NO CL ENTRY: 'Revert "[RFC] Fix LSan by fixing getauxval()"'. [#39434](https://github.com/ClickHouse/ClickHouse/pull/39434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Remove broken optimisation in Direct dictionary dictHas implementation"'. [#39461](https://github.com/ClickHouse/ClickHouse/pull/39461) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix trivial count optimization with array join"'. [#39466](https://github.com/ClickHouse/ClickHouse/pull/39466) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Separate data storage abstraction for MergeTree [#36555](https://github.com/ClickHouse/ClickHouse/pull/36555) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Randomize settings related to in-order read/aggregation [#36914](https://github.com/ClickHouse/ClickHouse/pull/36914) ([Azat Khuzhin](https://github.com/azat)).
* Merge tree reader support for multiple read/filter steps: row level filter, prewhere, ... [#37165](https://github.com/ClickHouse/ClickHouse/pull/37165) ([Alexander Gololobov](https://github.com/davenger)).
* Backup Improvements 6 [#37358](https://github.com/ClickHouse/ClickHouse/pull/37358) ([Vitaly Baranov](https://github.com/vitlibar)).
* Move `updateInputStream` to `ITransformingStep` [#37393](https://github.com/ClickHouse/ClickHouse/pull/37393) ([Nikita Taranov](https://github.com/nickitat)).
* Proper wait of the clickhouse-server in tests [#37560](https://github.com/ClickHouse/ClickHouse/pull/37560) ([Azat Khuzhin](https://github.com/azat)).
* Upgrade curl to 7.83.1 [#37795](https://github.com/ClickHouse/ClickHouse/pull/37795) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* Try fix flaky tests with transactions [#37822](https://github.com/ClickHouse/ClickHouse/pull/37822) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Send perf tests results to ci database [#37841](https://github.com/ClickHouse/ClickHouse/pull/37841) ([Vladimir C](https://github.com/vdimir)).
* Remove duplicate peak mem log [#37860](https://github.com/ClickHouse/ClickHouse/pull/37860) ([Amos Bird](https://github.com/amosbird)).
* tests: fix log_comment (extra quotes) [#37932](https://github.com/ClickHouse/ClickHouse/pull/37932) ([Azat Khuzhin](https://github.com/azat)).
* Throw exception when xml user profile does not exist [#38024](https://github.com/ClickHouse/ClickHouse/pull/38024) ([nvartolomei](https://github.com/nvartolomei)).
* Add `SYNC` command to internal ZooKeeper client [#38047](https://github.com/ClickHouse/ClickHouse/pull/38047) ([Antonio Andelic](https://github.com/antonio2368)).
* Better support of GCP storage [#38069](https://github.com/ClickHouse/ClickHouse/pull/38069) ([Anton Popov](https://github.com/CurtizJ)).
* Build artifacts upload [#38086](https://github.com/ClickHouse/ClickHouse/pull/38086) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Extract some diff from pr [#36171](https://github.com/ClickHouse/ClickHouse/issues/36171) [#38088](https://github.com/ClickHouse/ClickHouse/pull/38088) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Aggregate functions added restrict into batch methods [#38090](https://github.com/ClickHouse/ClickHouse/pull/38090) ([Maksim Kita](https://github.com/kitaisreal)).
* Add perf checkers to all Jepsen tests [#38091](https://github.com/ClickHouse/ClickHouse/pull/38091) ([Antonio Andelic](https://github.com/antonio2368)).
* Some fixes for tests with tsan [#38106](https://github.com/ClickHouse/ClickHouse/pull/38106) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Bring back [#36396](https://github.com/ClickHouse/ClickHouse/issues/36396) [#38110](https://github.com/ClickHouse/ClickHouse/pull/38110) ([Nikita Taranov](https://github.com/nickitat)).
* More suppressions for backward compatibility check [#38131](https://github.com/ClickHouse/ClickHouse/pull/38131) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Cherry pick [#38137](https://github.com/ClickHouse/ClickHouse/pull/38137) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Disable parameters for non direct executable user defined functions [#38142](https://github.com/ClickHouse/ClickHouse/pull/38142) ([Maksim Kita](https://github.com/kitaisreal)).
* SortDescription compile fix typo [#38144](https://github.com/ClickHouse/ClickHouse/pull/38144) ([Maksim Kita](https://github.com/kitaisreal)).
* Update version after release [#38147](https://github.com/ClickHouse/ClickHouse/pull/38147) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* New changelog and versions updated [#38148](https://github.com/ClickHouse/ClickHouse/pull/38148) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Some fixes for clickhouse-disks [#38150](https://github.com/ClickHouse/ClickHouse/pull/38150) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Remove processor description from span attributes - it is not working [#38157](https://github.com/ClickHouse/ClickHouse/pull/38157) ([Ilya Yatsishin](https://github.com/qoega)).
* Bump minimum / maximum LLVM to 12 / 14 [#38170](https://github.com/ClickHouse/ClickHouse/pull/38170) ([Robert Schulze](https://github.com/rschu1ze)).
* Disk transaction [#38182](https://github.com/ClickHouse/ClickHouse/pull/38182) ([alesapin](https://github.com/alesapin)).
* Check row size to avoid out of bounds access in PostgreSQLSource [#38190](https://github.com/ClickHouse/ClickHouse/pull/38190) ([Alexander Gololobov](https://github.com/davenger)).
* tests: add no-backward-compatibility-check for 02067_lost_part_s3 [#38195](https://github.com/ClickHouse/ClickHouse/pull/38195) ([Azat Khuzhin](https://github.com/azat)).
* tests/stress: fix TSan detection (enables thread fuzzer for non-TSan builds) [#38207](https://github.com/ClickHouse/ClickHouse/pull/38207) ([Azat Khuzhin](https://github.com/azat)).
* tests: disable 01646_system_restart_replicas_smoke under stress tests [#38212](https://github.com/ClickHouse/ClickHouse/pull/38212) ([Azat Khuzhin](https://github.com/azat)).
* tests/stress: fix TSan detection [#38213](https://github.com/ClickHouse/ClickHouse/pull/38213) ([Azat Khuzhin](https://github.com/azat)).
* buffer's getFileSize small changes [#38227](https://github.com/ClickHouse/ClickHouse/pull/38227) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix test for system table count in diag tool [#38236](https://github.com/ClickHouse/ClickHouse/pull/38236) ([Dale McDiarmid](https://github.com/gingerwizard)).
* Update version_date.tsv after v22.3.7.28-lts [#38237](https://github.com/ClickHouse/ClickHouse/pull/38237) ([github-actions[bot]](https://github.com/apps/github-actions)).
* Changelog attrs [#38238](https://github.com/ClickHouse/ClickHouse/pull/38238) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix reading from s3 in some corner cases [#38239](https://github.com/ClickHouse/ClickHouse/pull/38239) ([Anton Popov](https://github.com/CurtizJ)).
* use utility methods to access x509 struct fields. [#38251](https://github.com/ClickHouse/ClickHouse/pull/38251) ([larryluogit](https://github.com/larryluogit)).
* Don't try to kill empty list of containers in `integration/runner` II [#38269](https://github.com/ClickHouse/ClickHouse/pull/38269) ([Vladimir Chebotarev](https://github.com/excitoon)).
* Improve runners AMI and init scripts [#38273](https://github.com/ClickHouse/ClickHouse/pull/38273) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update thrift to 0.16.0 [#38280](https://github.com/ClickHouse/ClickHouse/pull/38280) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* Extract some diff from [#36171](https://github.com/ClickHouse/ClickHouse/issues/36171) [#38285](https://github.com/ClickHouse/ClickHouse/pull/38285) ([Kseniia Sumarokova](https://github.com/kssenii)).
* fix trace-viz zoom anomalies [#38287](https://github.com/ClickHouse/ClickHouse/pull/38287) ([Sergei Trifonov](https://github.com/serxa)).
* Integration tests volume [#38291](https://github.com/ClickHouse/ClickHouse/pull/38291) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* fix typo in view.md [#38292](https://github.com/ClickHouse/ClickHouse/pull/38292) ([Anton Petrov](https://github.com/gsenseless)).
* Backup improvements 7 [#38299](https://github.com/ClickHouse/ClickHouse/pull/38299) ([Vitaly Baranov](https://github.com/vitlibar)).
* Document why the submodule check does not halt the configuration [#38304](https://github.com/ClickHouse/ClickHouse/pull/38304) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix misleading error message while s3 schema inference [#38306](https://github.com/ClickHouse/ClickHouse/pull/38306) ([Kruglov Pavel](https://github.com/Avogar)).
* Update README.md [#38313](https://github.com/ClickHouse/ClickHouse/pull/38313) ([Yuko Takagi](https://github.com/yukotakagi)).
* Ban projections for zero-copy replication in a right way [#38322](https://github.com/ClickHouse/ClickHouse/pull/38322) ([alesapin](https://github.com/alesapin)).
* Checkout full repositories for performance tests [#38327](https://github.com/ClickHouse/ClickHouse/pull/38327) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fixed comments [#38331](https://github.com/ClickHouse/ClickHouse/pull/38331) ([Vladimir Chebotarev](https://github.com/excitoon)).
* Try to fix 02305_schema_inference_with_globs [#38337](https://github.com/ClickHouse/ClickHouse/pull/38337) ([Kruglov Pavel](https://github.com/Avogar)).
* Extend ZooKeeper list request with support for filtering persistent or ephemeral nodes only [#38338](https://github.com/ClickHouse/ClickHouse/pull/38338) ([Antonio Andelic](https://github.com/antonio2368)).
* Upload logs for getting all tests command [#38343](https://github.com/ClickHouse/ClickHouse/pull/38343) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Followup test fix for ban projections [#38351](https://github.com/ClickHouse/ClickHouse/pull/38351) ([alesapin](https://github.com/alesapin)).
* Added --recursive to clickhouse-disks list [#38354](https://github.com/ClickHouse/ClickHouse/pull/38354) ([Alexander Gololobov](https://github.com/davenger)).
* Adding TLS V13 Test [#38355](https://github.com/ClickHouse/ClickHouse/pull/38355) ([larryluogit](https://github.com/larryluogit)).
* Better exception messages on wrong table engines/functions argument types [#38362](https://github.com/ClickHouse/ClickHouse/pull/38362) ([Kruglov Pavel](https://github.com/Avogar)).
* Better error message for failed odbc query [#38364](https://github.com/ClickHouse/ClickHouse/pull/38364) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Simplify parts commit methods [#38380](https://github.com/ClickHouse/ClickHouse/pull/38380) ([alesapin](https://github.com/alesapin)).
* Update docker-compose to try get rid of v1 errors [#38394](https://github.com/ClickHouse/ClickHouse/pull/38394) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Correct submodule after "base-x" commit [#38414](https://github.com/ClickHouse/ClickHouse/pull/38414) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Better hardware benchmark [#38419](https://github.com/ClickHouse/ClickHouse/pull/38419) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Refactoring to enable multi-match functions with non-const needles [#38434](https://github.com/ClickHouse/ClickHouse/pull/38434) ([Robert Schulze](https://github.com/rschu1ze)).
* more consistent work with paths in object storages [#38436](https://github.com/ClickHouse/ClickHouse/pull/38436) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Distinct sorted: calculate column positions once [#38438](https://github.com/ClickHouse/ClickHouse/pull/38438) ([Igor Nikonov](https://github.com/devcrafter)).
* Small improvement of the error message to hint at possible issue [#38458](https://github.com/ClickHouse/ClickHouse/pull/38458) ([Miel Donkers](https://github.com/mdonkers)).
* Fix comment [#38465](https://github.com/ClickHouse/ClickHouse/pull/38465) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Follow up for [#38436](https://github.com/ClickHouse/ClickHouse/issues/38436) [#38466](https://github.com/ClickHouse/ClickHouse/pull/38466) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add 22.7 release webinar. [#38481](https://github.com/ClickHouse/ClickHouse/pull/38481) ([Yuko Takagi](https://github.com/yukotakagi)).
* Add some TSA annotations [#38487](https://github.com/ClickHouse/ClickHouse/pull/38487) ([Alexander Tokmakov](https://github.com/tavplubix)).
* tests: cleanup tmp data in 02335_column_ttl_expired_column_optimization [#38488](https://github.com/ClickHouse/ClickHouse/pull/38488) ([Azat Khuzhin](https://github.com/azat)).
* Cleanup: local clang-tidy warnings founded during review [#38489](https://github.com/ClickHouse/ClickHouse/pull/38489) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix some clang-tidy warnings in headers [#38491](https://github.com/ClickHouse/ClickHouse/pull/38491) ([Robert Schulze](https://github.com/rschu1ze)).
* A tiny improvement in report logging [#38507](https://github.com/ClickHouse/ClickHouse/pull/38507) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* tests: fix 02305_schema_inference_with_globs flakiness [#38511](https://github.com/ClickHouse/ClickHouse/pull/38511) ([Azat Khuzhin](https://github.com/azat)).
* Try to fix flaky test [#38516](https://github.com/ClickHouse/ClickHouse/pull/38516) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `_csv.Error: field larger than field limit` [#38518](https://github.com/ClickHouse/ClickHouse/pull/38518) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix application errors grep in stress test [#38520](https://github.com/ClickHouse/ClickHouse/pull/38520) ([Kruglov Pavel](https://github.com/Avogar)).
* Use of disk batch operations in MergeTree [#38531](https://github.com/ClickHouse/ClickHouse/pull/38531) ([alesapin](https://github.com/alesapin)).
* Backup Improvements 8 [#38537](https://github.com/ClickHouse/ClickHouse/pull/38537) ([Vitaly Baranov](https://github.com/vitlibar)).
* Update poco [#38540](https://github.com/ClickHouse/ClickHouse/pull/38540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Don't use std::unique_lock unless we need to [#38542](https://github.com/ClickHouse/ClickHouse/pull/38542) ([Robert Schulze](https://github.com/rschu1ze)).
* Rename slightly weirdly named "BuilderBinTidy" to "BuilderBinClangTidy" [#38546](https://github.com/ClickHouse/ClickHouse/pull/38546) ([Robert Schulze](https://github.com/rschu1ze)).
* Don't rollback SessionID request in Keeper [#38556](https://github.com/ClickHouse/ClickHouse/pull/38556) ([Antonio Andelic](https://github.com/antonio2368)).
* Add logging in Epoll and TimerDescriptor in case of EINTR [#38559](https://github.com/ClickHouse/ClickHouse/pull/38559) ([Kruglov Pavel](https://github.com/Avogar)).
* SQL create drop index minor fixes [#38561](https://github.com/ClickHouse/ClickHouse/pull/38561) ([Maksim Kita](https://github.com/kitaisreal)).
* Update version_date.tsv and changelogs after v22.6.2.12-stable [#38563](https://github.com/ClickHouse/ClickHouse/pull/38563) ([github-actions[bot]](https://github.com/apps/github-actions)).
* Allow Ordinary database in Stress Tests [#38568](https://github.com/ClickHouse/ClickHouse/pull/38568) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Make postgres integration test great again [#38582](https://github.com/ClickHouse/ClickHouse/pull/38582) ([Ilya Yatsishin](https://github.com/qoega)).
* Add check for empty proccessors in AggregatingTransform::expandPipeline [#38584](https://github.com/ClickHouse/ClickHouse/pull/38584) ([filimonov](https://github.com/filimonov)).
* quick fix for 02112_with_fill_interval [#38587](https://github.com/ClickHouse/ClickHouse/pull/38587) ([Nikita Taranov](https://github.com/nickitat)).
* Remove zlib in mariadb-connector-c [#38599](https://github.com/ClickHouse/ClickHouse/pull/38599) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* Dictionaries added TSA annotations [#38601](https://github.com/ClickHouse/ClickHouse/pull/38601) ([Maksim Kita](https://github.com/kitaisreal)).
* CacheDictionary simplify update queue [#38602](https://github.com/ClickHouse/ClickHouse/pull/38602) ([Maksim Kita](https://github.com/kitaisreal)).
* Add separate option to omit symbols from heavy contrib [#38617](https://github.com/ClickHouse/ClickHouse/pull/38617) ([Azat Khuzhin](https://github.com/azat)).
* Fix exception messages in clickhouse su [#38619](https://github.com/ClickHouse/ClickHouse/pull/38619) ([filimonov](https://github.com/filimonov)).
* Added Greenplum benchmark [#38622](https://github.com/ClickHouse/ClickHouse/pull/38622) ([Dmitry Pavlov](https://github.com/kapustor)).
* Fix typo [#38623](https://github.com/ClickHouse/ClickHouse/pull/38623) ([tiegen](https://github.com/loyispa)).
* Better diagnostics in ReplicatedMergeTreeQueue [#38641](https://github.com/ClickHouse/ClickHouse/pull/38641) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Clean out randomized integration volumes each run [#38644](https://github.com/ClickHouse/ClickHouse/pull/38644) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update README.md [#38651](https://github.com/ClickHouse/ClickHouse/pull/38651) ([Yuko Takagi](https://github.com/yukotakagi)).
* Better naming for stuff related to splitted debug symbols [#38654](https://github.com/ClickHouse/ClickHouse/pull/38654) ([Robert Schulze](https://github.com/rschu1ze)).
* Add test for keeper `mntr` command [#38656](https://github.com/ClickHouse/ClickHouse/pull/38656) ([alesapin](https://github.com/alesapin)).
* Update hardware benchmark script [#38672](https://github.com/ClickHouse/ClickHouse/pull/38672) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* Fix strange backport titles issues [#38679](https://github.com/ClickHouse/ClickHouse/pull/38679) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Follow-up to [#38568](https://github.com/ClickHouse/ClickHouse/issues/38568) [#38680](https://github.com/ClickHouse/ClickHouse/pull/38680) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix possible deadlocks with `MergeTreeData::Transaction` [#38702](https://github.com/ClickHouse/ClickHouse/pull/38702) ([alesapin](https://github.com/alesapin)).
* Fix backports diff [#38703](https://github.com/ClickHouse/ClickHouse/pull/38703) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix FillingTransform [#38705](https://github.com/ClickHouse/ClickHouse/pull/38705) ([Nikita Taranov](https://github.com/nickitat)).
* Try to improve backward compatibility check [#38717](https://github.com/ClickHouse/ClickHouse/pull/38717) ([Kruglov Pavel](https://github.com/Avogar)).
* SQL create drop index fix formatting [#38720](https://github.com/ClickHouse/ClickHouse/pull/38720) ([Maksim Kita](https://github.com/kitaisreal)).
* Provide sort description for output stream in ReadFromMergeTree step [#38721](https://github.com/ClickHouse/ClickHouse/pull/38721) ([Igor Nikonov](https://github.com/devcrafter)).
* Add exp_internal for expect tests [#38728](https://github.com/ClickHouse/ClickHouse/pull/38728) ([Azat Khuzhin](https://github.com/azat)).
* Fix CLICKHOUSE_TMP in tests (fixes broken CI) [#38733](https://github.com/ClickHouse/ClickHouse/pull/38733) ([Azat Khuzhin](https://github.com/azat)).
* Add SimpleCheck [#38744](https://github.com/ClickHouse/ClickHouse/pull/38744) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Tiny tests cleanup [#38749](https://github.com/ClickHouse/ClickHouse/pull/38749) ([Azat Khuzhin](https://github.com/azat)).
* Fix replication after improper merge process [#38752](https://github.com/ClickHouse/ClickHouse/pull/38752) ([Raúl Marín](https://github.com/Algunenano)).
* tests: make aggregate_state_exception_memory_leak deterministic [#38754](https://github.com/ClickHouse/ClickHouse/pull/38754) ([Azat Khuzhin](https://github.com/azat)).
* Bump jemalloc to fix possible assertion [#38757](https://github.com/ClickHouse/ClickHouse/pull/38757) ([Azat Khuzhin](https://github.com/azat)).
* Reintroduce nonNegativeDerivative() [#38774](https://github.com/ClickHouse/ClickHouse/pull/38774) ([Andrey Zvonov](https://github.com/zvonand)).
* Temporarily disable 01710_projection_fetch_long in BC check [#38798](https://github.com/ClickHouse/ClickHouse/pull/38798) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Use native Map type for OpenTelemetry attributes [#38814](https://github.com/ClickHouse/ClickHouse/pull/38814) ([Ilya Yatsishin](https://github.com/qoega)).
* Add test for segfault in Map combinator [#38831](https://github.com/ClickHouse/ClickHouse/pull/38831) ([Kruglov Pavel](https://github.com/Avogar)).
* Update libprotobuf-mutator + fix build [#38834](https://github.com/ClickHouse/ClickHouse/pull/38834) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Get files changed in master since release is branched [#38836](https://github.com/ClickHouse/ClickHouse/pull/38836) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* update integration tests doc [#38837](https://github.com/ClickHouse/ClickHouse/pull/38837) ([Bharat Nallan](https://github.com/bharatnc)).
* Revert of revert of smallish devguide update [#38850](https://github.com/ClickHouse/ClickHouse/pull/38850) ([Robert Schulze](https://github.com/rschu1ze)).
* Do not override compiler if it had been already set [#38856](https://github.com/ClickHouse/ClickHouse/pull/38856) ([Azat Khuzhin](https://github.com/azat)).
* Move check for denied allocations [#38858](https://github.com/ClickHouse/ClickHouse/pull/38858) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Refactoring of code around object storages, added LocalObjectStorage (extracted this diff from PR [#36171](https://github.com/ClickHouse/ClickHouse/issues/36171)) [#38860](https://github.com/ClickHouse/ClickHouse/pull/38860) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backup Improvements 9 [#38861](https://github.com/ClickHouse/ClickHouse/pull/38861) ([Vitaly Baranov](https://github.com/vitlibar)).
* Simple cleanup: interpreters and parsers [#38876](https://github.com/ClickHouse/ClickHouse/pull/38876) ([Igor Nikonov](https://github.com/devcrafter)).
* Remove unnecessary log [#38892](https://github.com/ClickHouse/ClickHouse/pull/38892) ([Raúl Marín](https://github.com/Algunenano)).
* Update version_date.tsv and changelogs after v22.6.3.35-stable [#38894](https://github.com/ClickHouse/ClickHouse/pull/38894) ([github-actions[bot]](https://github.com/apps/github-actions)).
* Retry docker buildx commands with progressive sleep in between [#38898](https://github.com/ClickHouse/ClickHouse/pull/38898) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Avoid false positive red sanitizer asserts check in stress test [#38901](https://github.com/ClickHouse/ClickHouse/pull/38901) ([Kruglov Pavel](https://github.com/Avogar)).
* Interpreter cleanup: ContextPtr -> const ContextPtr & in parameters [#38902](https://github.com/ClickHouse/ClickHouse/pull/38902) ([Igor Nikonov](https://github.com/devcrafter)).
* Add a test for simdjson [#38933](https://github.com/ClickHouse/ClickHouse/pull/38933) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix a typo [#38938](https://github.com/ClickHouse/ClickHouse/pull/38938) ([Nikolay Degterinsky](https://github.com/evillique)).
* Avoid redundant join block transformation during planning. [#38943](https://github.com/ClickHouse/ClickHouse/pull/38943) ([Amos Bird](https://github.com/amosbird)).
* Rename NUMBER_OF_DIMENSIONS_MISMATHED const to NUMBER_OF_DIMENSIONS_MISMATCHED [#38947](https://github.com/ClickHouse/ClickHouse/pull/38947) ([Vladimir Galunshchikov](https://github.com/soyayaos)).
* More careful destructor in BackupImpl [#38949](https://github.com/ClickHouse/ClickHouse/pull/38949) ([Vitaly Baranov](https://github.com/vitlibar)).
* Avoid weird exception in Keeper [#38963](https://github.com/ClickHouse/ClickHouse/pull/38963) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Update version_date.tsv after v22.3.8.39-lts [#38969](https://github.com/ClickHouse/ClickHouse/pull/38969) ([github-actions[bot]](https://github.com/apps/github-actions)).
* Remove tag no-backward-compatibility-check for specific versions [#38971](https://github.com/ClickHouse/ClickHouse/pull/38971) ([Kruglov Pavel](https://github.com/Avogar)).
* add Hetzner benchmark [#38974](https://github.com/ClickHouse/ClickHouse/pull/38974) ([Tyler Hannan](https://github.com/tylerhannan)).
* Update version_date.tsv after v22.4.6.53-stable [#38975](https://github.com/ClickHouse/ClickHouse/pull/38975) ([github-actions[bot]](https://github.com/apps/github-actions)).
* Disable instrumentation of sanitizer death callback [#38977](https://github.com/ClickHouse/ClickHouse/pull/38977) ([Alexander Tokmakov](https://github.com/tavplubix)).
* add ryzen 9 5950 benchmark [#38979](https://github.com/ClickHouse/ClickHouse/pull/38979) ([Tyler Hannan](https://github.com/tylerhannan)).
* EXPLAIN AST rewrite: rename to optimize [#38980](https://github.com/ClickHouse/ClickHouse/pull/38980) ([Igor Nikonov](https://github.com/devcrafter)).
* add macbook pro core i7 2014 benchmark [#38981](https://github.com/ClickHouse/ClickHouse/pull/38981) ([Tyler Hannan](https://github.com/tylerhannan)).
* add Huawei TaiShan 920 Benchmark [#38982](https://github.com/ClickHouse/ClickHouse/pull/38982) ([Tyler Hannan](https://github.com/tylerhannan)).
* tests: unique ZooKeeper path for Replicated.*MergeTree tables [#38999](https://github.com/ClickHouse/ClickHouse/pull/38999) ([Azat Khuzhin](https://github.com/azat)).
* Try another suppression for [#38629](https://github.com/ClickHouse/ClickHouse/issues/38629) [#39009](https://github.com/ClickHouse/ClickHouse/pull/39009) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add docker_server.py running to backport and release CIs [#39011](https://github.com/ClickHouse/ClickHouse/pull/39011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix flaky `test_system_merges/test.py::test_mutation_simple` [#39013](https://github.com/ClickHouse/ClickHouse/pull/39013) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix assertion in full soring merge join [#39014](https://github.com/ClickHouse/ClickHouse/pull/39014) ([Vladimir C](https://github.com/vdimir)).
* Fix flaky 00620_optimize_on_nonleader_replica_zookeeper [#39019](https://github.com/ClickHouse/ClickHouse/pull/39019) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Suppress [#38643](https://github.com/ClickHouse/ClickHouse/issues/38643) [#39024](https://github.com/ClickHouse/ClickHouse/pull/39024) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Update url.md [#39025](https://github.com/ClickHouse/ClickHouse/pull/39025) ([Ilya Yatsishin](https://github.com/qoega)).
* Fix 'Tried to lock part ... for removal second time' [#39036](https://github.com/ClickHouse/ClickHouse/pull/39036) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add more settings for randomization [#39039](https://github.com/ClickHouse/ClickHouse/pull/39039) ([Anton Popov](https://github.com/CurtizJ)).
* add ScaleFlux CSD3000 Benchmark [#39040](https://github.com/ClickHouse/ClickHouse/pull/39040) ([Tyler Hannan](https://github.com/tylerhannan)).
* BACKUP/RESTORE ON CLUSTER use async mode on replicas now. [#39046](https://github.com/ClickHouse/ClickHouse/pull/39046) ([Vitaly Baranov](https://github.com/vitlibar)).
* More stable `test_s3_zero_copy_ttl`, weakened requirement to move data to S3 in 0-5 seconds [#39064](https://github.com/ClickHouse/ClickHouse/pull/39064) ([Vladimir Chebotaryov](https://github.com/quickhouse)).
* Parameter --decompressor added to utils/self-extracting-executable/compressor [#39065](https://github.com/ClickHouse/ClickHouse/pull/39065) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Follow-up to [#39036](https://github.com/ClickHouse/ClickHouse/issues/39036) [#39091](https://github.com/ClickHouse/ClickHouse/pull/39091) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Update registerDiskS3.cpp [#39092](https://github.com/ClickHouse/ClickHouse/pull/39092) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix code in getLeastSupertype function [#39101](https://github.com/ClickHouse/ClickHouse/pull/39101) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove some debug logging [#39102](https://github.com/ClickHouse/ClickHouse/pull/39102) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Prefix overridden add_executable() command with "clickhouse_" [#39108](https://github.com/ClickHouse/ClickHouse/pull/39108) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix meilisearch tests [#39110](https://github.com/ClickHouse/ClickHouse/pull/39110) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Properly remove projection from part in case it was removed from table metadata. [#39119](https://github.com/ClickHouse/ClickHouse/pull/39119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Update cluster.py [#39120](https://github.com/ClickHouse/ClickHouse/pull/39120) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Tiny updates for tests. [#39127](https://github.com/ClickHouse/ClickHouse/pull/39127) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix data race in CompletedPipelineExecutor. [#39132](https://github.com/ClickHouse/ClickHouse/pull/39132) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix style again [#39133](https://github.com/ClickHouse/ClickHouse/pull/39133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix path retrieval for Keeper's state [#39148](https://github.com/ClickHouse/ClickHouse/pull/39148) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Slightly better interface of waitForMutation [#39154](https://github.com/ClickHouse/ClickHouse/pull/39154) ([Amos Bird](https://github.com/amosbird)).
* ThreadPool fixes [#39160](https://github.com/ClickHouse/ClickHouse/pull/39160) ([Azat Khuzhin](https://github.com/azat)).
* Add test for [#39132](https://github.com/ClickHouse/ClickHouse/issues/39132) [#39173](https://github.com/ClickHouse/ClickHouse/pull/39173) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Suppression for BC check (`Cannot parse string 'Hello' as UInt64`) [#39176](https://github.com/ClickHouse/ClickHouse/pull/39176) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix 01961_roaring_memory_tracking test [#39187](https://github.com/ClickHouse/ClickHouse/pull/39187) ([Dmitry Novik](https://github.com/novikd)).
* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on … [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix exception in AsynchronousMetrics for s390x [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Optimize accesses to system.stack_trace (filter by name before sending signal) [#39212](https://github.com/ClickHouse/ClickHouse/pull/39212) ([Azat Khuzhin](https://github.com/azat)).
* Enable warning "-Wdeprecated-dynamic-exception-spec" [#39213](https://github.com/ClickHouse/ClickHouse/pull/39213) ([Robert Schulze](https://github.com/rschu1ze)).
* Remove specialization global lock/unlock from ActionLocksManager [#39215](https://github.com/ClickHouse/ClickHouse/pull/39215) ([Azat Khuzhin](https://github.com/azat)).
* Turn some warnings on [#39223](https://github.com/ClickHouse/ClickHouse/pull/39223) ([Robert Schulze](https://github.com/rschu1ze)).
* Pass const std::string_view by value, not by reference [#39224](https://github.com/ClickHouse/ClickHouse/pull/39224) ([Kruglov Pavel](https://github.com/Avogar)).
* Minor fix for BC check [#39231](https://github.com/ClickHouse/ClickHouse/pull/39231) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backport script [#39235](https://github.com/ClickHouse/ClickHouse/pull/39235) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Do not color logs on client if they are redirected to file [#39243](https://github.com/ClickHouse/ClickHouse/pull/39243) ([Anton Popov](https://github.com/CurtizJ)).
* Remove incorrect assertion [#39245](https://github.com/ClickHouse/ClickHouse/pull/39245) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add X86 prefix to x86 performance tests [#39251](https://github.com/ClickHouse/ClickHouse/pull/39251) ([Robert Schulze](https://github.com/rschu1ze)).
* Check that the destination for a backup is not in use. [#39254](https://github.com/ClickHouse/ClickHouse/pull/39254) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix stacktraces in gdb in BC check [#39256](https://github.com/ClickHouse/ClickHouse/pull/39256) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable flaky test `test_s3_zero_copy_on_hybrid_storage` [#39258](https://github.com/ClickHouse/ClickHouse/pull/39258) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Enabled Wc99-extensions + Wsign-conversion [#39261](https://github.com/ClickHouse/ClickHouse/pull/39261) ([Robert Schulze](https://github.com/rschu1ze)).
* Pass const StringRef by value, not by reference [#39262](https://github.com/ClickHouse/ClickHouse/pull/39262) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix assertion in transactions [#39263](https://github.com/ClickHouse/ClickHouse/pull/39263) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix macosx compilation due to endian.h [#39265](https://github.com/ClickHouse/ClickHouse/pull/39265) ([Jordi Villar](https://github.com/jrdi)).
* Another supression for BC check [#39276](https://github.com/ClickHouse/ClickHouse/pull/39276) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix builder image for releases w/o diagnostics tool [#39281](https://github.com/ClickHouse/ClickHouse/pull/39281) ([Azat Khuzhin](https://github.com/azat)).
* [RFC] Remove superior atomic from MergeTreeBackgroundExecutor and annotations for TSA [#39285](https://github.com/ClickHouse/ClickHouse/pull/39285) ([Azat Khuzhin](https://github.com/azat)).
* Fix clang tidy [#39288](https://github.com/ClickHouse/ClickHouse/pull/39288) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix running cmake with predefined cache (for development only) [#39295](https://github.com/ClickHouse/ClickHouse/pull/39295) ([Azat Khuzhin](https://github.com/azat)).
* Fix googletest contrib compilation (due to GTEST_HAS_POSIX_RE=0) [#39298](https://github.com/ClickHouse/ClickHouse/pull/39298) ([Azat Khuzhin](https://github.com/azat)).
* First try at reducing the use of StringRef [#39300](https://github.com/ClickHouse/ClickHouse/pull/39300) ([Robert Schulze](https://github.com/rschu1ze)).
* Whitespaces [#39303](https://github.com/ClickHouse/ClickHouse/pull/39303) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add test for window function inside CASE [#39305](https://github.com/ClickHouse/ClickHouse/pull/39305) ([Dmitry Novik](https://github.com/novikd)).
* Simple Check should be updated on rerun [#39307](https://github.com/ClickHouse/ClickHouse/pull/39307) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix leaking of logger in clickhouse-disks [#39314](https://github.com/ClickHouse/ClickHouse/pull/39314) ([Azat Khuzhin](https://github.com/azat)).
* Update exception message [#39315](https://github.com/ClickHouse/ClickHouse/pull/39315) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix build clang-13 [#39318](https://github.com/ClickHouse/ClickHouse/pull/39318) ([alesapin](https://github.com/alesapin)).
* Auto set test name in integration tests [#39322](https://github.com/ClickHouse/ClickHouse/pull/39322) ([Vitaly Baranov](https://github.com/vitlibar)).
* Try fix flaky test_store_cleanup [#39334](https://github.com/ClickHouse/ClickHouse/pull/39334) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Do not start on unexpected Ordinary metadata [#39337](https://github.com/ClickHouse/ClickHouse/pull/39337) ([Alexander Tokmakov](https://github.com/tavplubix)).
* switch from mkdocs to Docusaurus [#39338](https://github.com/ClickHouse/ClickHouse/pull/39338) ([Dan Roscigno](https://github.com/DanRoscigno)).
* Fix flaky 01174_select_insert_isolation [#39339](https://github.com/ClickHouse/ClickHouse/pull/39339) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Better exception messages in schema inference [#39340](https://github.com/ClickHouse/ClickHouse/pull/39340) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix memory exceptions with transactions [#39341](https://github.com/ClickHouse/ClickHouse/pull/39341) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix typo [#39360](https://github.com/ClickHouse/ClickHouse/pull/39360) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix typo [#39361](https://github.com/ClickHouse/ClickHouse/pull/39361) ([Kruglov Pavel](https://github.com/Avogar)).
* Do not enqueue uneeded parts for check [#39366](https://github.com/ClickHouse/ClickHouse/pull/39366) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Avoid loading toolchain file multiple times to avoid confusing ccache [#39387](https://github.com/ClickHouse/ClickHouse/pull/39387) ([Azat Khuzhin](https://github.com/azat)).
* Fix make clean (due to crosscompile of llvm) [#39392](https://github.com/ClickHouse/ClickHouse/pull/39392) ([Azat Khuzhin](https://github.com/azat)).
* Disable real-time digest in Keeper by default [#39393](https://github.com/ClickHouse/ClickHouse/pull/39393) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix interactive client with older server [#39413](https://github.com/ClickHouse/ClickHouse/pull/39413) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix BC check [#39414](https://github.com/ClickHouse/ClickHouse/pull/39414) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix flaky test test_login_as_dropped_user_xml. [#39415](https://github.com/ClickHouse/ClickHouse/pull/39415) ([Vitaly Baranov](https://github.com/vitlibar)).
* Introduce a dependency to libuv when building NATS [#39427](https://github.com/ClickHouse/ClickHouse/pull/39427) ([ltrk2](https://github.com/ltrk2)).
* Set default value cross_to_inner_join_rewrite = 1 [#39443](https://github.com/ClickHouse/ClickHouse/pull/39443) ([Vladimir C](https://github.com/vdimir)).
* Respect table alias for additional_table_filters. [#39456](https://github.com/ClickHouse/ClickHouse/pull/39456) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### Performance optimization and Bug Fix
* Enabled `pread_threadpool` read method by default. It will increase read performance. Bug fix: if direct IO is enabled and the number of threads is large and `pread_threadpool` is used, it may cause a logical error. [#33653](https://github.com/ClickHouse/ClickHouse/pull/33653) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -119,16 +119,9 @@ On CentOS, RedHat run `sudo yum install cmake ninja-build`.
If you use Arch or Gentoo, you probably know it yourself how to install CMake.
For installing CMake and Ninja on Mac OS X first install Homebrew and then install everything else via brew:
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
brew install cmake ninja
Next, check the version of CMake: `cmake --version`. If it is below 3.12, you should install a newer version from the website: https://cmake.org/download/.
## C++ Compiler {#c-compiler}
Compilers Clang starting from version 11 is supported for building ClickHouse.
Compilers Clang starting from version 12 is supported for building ClickHouse.
Clang should be used instead of gcc. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations.
@ -138,9 +131,6 @@ On Ubuntu/Debian you can use the automatic installation script (check [official
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
```
Mac OS X build is also supported. Just run `brew install llvm`
## The Building Process {#the-building-process}
Now that you are ready to build ClickHouse we recommend you to create a separate directory `build` inside `ClickHouse` that will contain all of the build artefacts:

View File

@ -692,9 +692,7 @@ auto s = std::string{"Hello"};
**1.** Virtual inheritance is not used.
**2.** Exception specifiers from C++03 are not used.
**3.** Constructs which have convenient syntactic sugar in modern C++, e.g.
**2.** Constructs which have convenient syntactic sugar in modern C++, e.g.
```
// Traditional way without syntactic sugar
@ -745,7 +743,7 @@ But other things being equal, cross-platform or portable code is preferred.
**2.** Language: C++20 (see the list of available [C++20 features](https://en.cppreference.com/w/cpp/compiler_support#C.2B.2B20_features)).
**3.** Compiler: `clang`. At this time (April 2021), the code is compiled using clang version 11. (It can also be compiled using `gcc` version 10, but it's untested and not suitable for production usage).
**3.** Compiler: `clang`. At the time of writing (July 2022), the code is compiled using clang version >= 12. (It can also be compiled using `gcc`, but it's untested and not suitable for production usage).
The standard library is used (`libc++`).
@ -755,7 +753,7 @@ The standard library is used (`libc++`).
The CPU instruction set is the minimum supported set among our servers. Currently, it is SSE 4.2.
**6.** Use `-Wall -Wextra -Werror` compilation flags. Also `-Weverything` is used with few exceptions.
**6.** Use `-Wall -Wextra -Werror -Weverything` compilation flags with a few exception.
**7.** Use static linking with all libraries except those that are difficult to connect to statically (see the output of the `ldd` command).

View File

@ -81,11 +81,11 @@ $ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
## Performance Tests {#performance-tests}
Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Tests are located at `tests/performance`. Each test is represented by `.xml` file with description of test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation.
Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Performance tests are located at `tests/performance/`. Each test is represented by an `.xml` file with a description of the test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation.
Each test run one or multiple queries (possibly with combinations of parameters) in a loop.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other `perf` tools during your tests.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. Also, it is recommended to write performance tests when you add or modify SQL functions which are relatively isolated and not too obscure. It always makes sense to use `perf top` or other `perf` tools during your tests.
## Test Tools and Scripts {#test-tools-and-scripts}

View File

@ -482,9 +482,9 @@ For example:
## Projections {#projections}
Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries.
Projections are an experimental feature. To enable them you must set the [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) to `1`. See also the [force_optimize_projection](../../../operations/settings/settings.md#force-optimize-projection) setting.
::: note
When you are implementing projections you should also consider the [force_optimize_projection](../../../operations/settings/settings.md#force-optimize-projection) setting.
:::
Projections are not supported in the `SELECT` statements with the [FINAL](../../../sql-reference/statements/select/from.md#select-from-final) modifier.
### Projection Query {#projection-query}

View File

@ -194,18 +194,25 @@ Differs from the `TabSeparated` format in that the column names are written in t
During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
This format is also available under the name `TSVWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes}
Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
The first row with names is processed the same way as in `TabSeparatedWithNames` format.
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
This format is also available under the name `TSVWithNamesAndTypes`.
@ -451,10 +458,24 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## CSVWithNamesAndTypes {#csvwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## CustomSeparated {#format-customseparated}
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](../operations/settings/settings.md#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](../operations/settings/settings.md#format_custom_field_delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](../operations/settings/settings.md#format_custom_result_after_delimiter) settings, not from format strings.
@ -465,10 +486,24 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## SQLInsert {#sqlinsert}
Outputs data as a sequence of `INSERT INTO table (columns...) VALUES (...), (...) ...;` statements.
@ -911,18 +946,46 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie
Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes}
Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## JSONCompactStringsEachRowWithNames {#jsoncompactstringseachrowwithnames}
Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## JSONCompactStringsEachRowWithNamesAndTypes {#jsoncompactstringseachrowwithnamesandtypes}
Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
```json
["num", "str", "arr"]
["Int32", "String", "Array(UInt8)"]
@ -1199,6 +1262,12 @@ Similar to [RowBinary](#rowbinary), but with added header:
- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N)
- N `String`s specifying column names
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes}
Similar to [RowBinary](#rowbinary), but with added header:
@ -1207,6 +1276,14 @@ Similar to [RowBinary](#rowbinary), but with added header:
- N `String`s specifying column names
- N `String`s specifying column types
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## Values {#data-format-values}
Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces arent inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../sql-reference/syntax.md) is represented as `NULL`.

View File

@ -67,7 +67,7 @@ Features:
### Grafana {#grafana}
[Grafana](https://grafana.com/grafana/plugins/vertamedia-clickhouse-datasource) is a platform for monitoring and visualization.
[Grafana](https://grafana.com/grafana/plugins/grafana-clickhouse-datasource/) is a platform for monitoring and visualization.
"Grafana allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data driven culture. Trusted and loved by the community" &mdash; grafana.com.

View File

@ -45,7 +45,7 @@ Configuration template:
- `min_part_size` The minimum size of a data part.
- `min_part_size_ratio` The ratio of the data part size to the table size.
- `method` Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`.
- `method` Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`.
- `level` Compression level. See [Codecs](../../sql-reference/statements/create/table.md#create-query-general-purpose-codecs).
You can configure multiple `<case>` sections.

View File

@ -29,7 +29,7 @@ Structure of the `users` section:
<profile>profile_name</profile>
<quota>default</quota>
<default_database>default<default_database>
<default_database>default</default_database>
<databases>
<database_name>
<table_name>

View File

@ -302,18 +302,34 @@ Default value: `ALL`.
Specifies [JOIN](../../sql-reference/statements/select/join.md) algorithm.
Several algorithms can be specified, and an available one would be chosen for a particular query based on kind/strictness and table engine.
Possible values:
- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used.
- `partial_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) is used.
- `prefer_partial_merge` — ClickHouse always tries to use `merge` join if possible.
- `auto` — ClickHouse tries to change `hash` join to `merge` join on the fly to avoid out of memory.
- `default``hash` or `direct`, if possible (same as `direct,hash`)
Default value: `hash`.
- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
When using `hash` algorithm the right part of `JOIN` is uploaded into RAM.
- `parallel_hash` - a variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
- `partial_merge` — a variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
When using `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
- `direct` - can be applied when the right storage supports key-value requests.
The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs.
- `auto` — try `hash` join and switch on the fly to another algorithm if the memory limit is violated.
- `full_sorting_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
- `prefer_partial_merge` — ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
When using `partial_merge` algorithm ClickHouse sorts the data and dumps it to the disk. The `merge` algorithm in ClickHouse differs a bit from the classic realization. First ClickHouse sorts the right table by [join key](../../sql-reference/statements/select/join.md#select-join) in blocks and creates min-max index for sorted blocks. Then it sorts parts of left table by `join key` and joins them over right table. The min-max index is also used to skip unneeded right table blocks.
## join_any_take_last_row {#settings-join_any_take_last_row}

View File

@ -5,9 +5,9 @@ sidebar_label: Sources of External Dictionaries
# Sources of External Dictionaries
An external dictionary can be connected from many different sources.
An external dictionary can be connected to ClickHouse from many different sources.
If dictionary is configured using xml-file, the configuration looks like this:
If the dictionary is configured using an xml-file, the configuration looks like this:
``` xml
<clickhouse>
@ -24,7 +24,7 @@ If dictionary is configured using xml-file, the configuration looks like this:
</clickhouse>
```
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), equal configuration will looks like:
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), the configuration described above will look like:
``` sql
CREATE DICTIONARY dict_name (...)
@ -96,7 +96,7 @@ Setting fields:
- `path` The absolute path to the file.
- `format` The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
When dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in `user_files` directory, to prevent DB users accessing arbitrary file on ClickHouse node.
When a dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in the `user_files` directory to prevent DB users from accessing arbitrary files on the ClickHouse node.
**See Also**
@ -104,7 +104,7 @@ When dictionary with source `FILE` is created via DDL command (`CREATE DICTIONAR
## Executable File
Working with executable files depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable files STDIN. Otherwise, ClickHouse starts executable file and treats its output as dictionary data.
Working with executable files depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable files STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
Example of settings:
@ -120,22 +120,22 @@ Example of settings:
Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `command` — The absolute path to the executable file, or the file name (if the command's directory is in the `PATH`).
- `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `command_termination_timeout`The executable script should contain a main read-write loop. After the dictionary is destroyed, the pipe is closed, and the executable file will have `command_termination_timeout` seconds to shutdown before ClickHouse will send a SIGTERM signal to the child process. `command_termination_timeout` is specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - Timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - Timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
## Executable Pool
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, `complex_key_direct` layouts.
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts.
Executable pool will spawn pool of processes with specified command and keep them running until they exit. The program should read data from STDIN while it is available and output result to STDOUT, and it can wait for next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early.
Executable pool will spawn a pool of processes with the specified command and keep them running until they exit. The program should read data from STDIN while it is available and output the result to STDOUT. It can wait for the next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data, but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early.
Example of settings:
@ -555,7 +555,11 @@ Setting fields:
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
MySQL can be connected on a local host via sockets. To do this, set `host` and `socket`.
:::note
There is no explicit parameter `secure`. When establishing an SSL-connection security is mandatory.
:::
MySQL can be connected to on a local host via sockets. To do this, set `host` and `socket`.
Example of settings:
@ -815,4 +819,4 @@ Setting fields:
:::note
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
:::

View File

@ -836,7 +836,7 @@ Result:
## now
Returns the current date and time.
Returns the current date and time at the moment of query analysis. The function is a constant expression.
**Syntax**
@ -884,14 +884,20 @@ Result:
└──────────────────────┘
```
## nowInBlock
Returns the current date and time at the moment of processing of each block of data. In contrast to the function `now`, it is not a constant expression, and the returned value will be different in different blocks for long-running queries.
It makes sense to use this function to generate the current time in long-running INSERT SELECT queries.
## today
Accepts zero arguments and returns the current date at one of the moments of request execution.
Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as toDate(now()).
## yesterday
Accepts zero arguments and returns yesterdays date at one of the moments of request execution.
Accepts zero arguments and returns yesterdays date at one of the moments of query analysis.
The same as today() - 1.
## timeSlot

View File

@ -494,22 +494,21 @@ If the s string is non-empty and does not contain the c character at
Returns the string s that was converted from the encoding in from to the encoding in to.
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(encoded_text[, alphabet_name])
## Base58Encode(plaintext), Base58Decode(encoded_text)
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using specified alphabet.
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet.
**Syntax**
```sql
base58Encode(decoded[, alphabet_name])
base58Decode(encoded[, alphabet_name])
base58Encode(decoded)
base58Decode(encoded)
```
**Arguments**
- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant.
- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown.
- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `bitcoin`.
**Returned value**
@ -522,17 +521,17 @@ Type: [String](../../sql-reference/data-types/string.md).
Query:
``` sql
SELECT base58Encode('encode', 'flickr');
SELECT base58Decode('izCFiDUY', 'ripple');
SELECT base58Encode('Encoded');
SELECT base58Encode('3dc8KtHrwM');
```
Result:
```text
┌─base58Encode('encode', 'flickr')─┐
SvyTHb1D
┌─encodeBase58('Encoded')─┐
3dc8KtHrwM
└──────────────────────────────────┘
┌─base58Decode('izCFiDUY', 'ripple')─┐
decode
┌─decodeBase58('3dc8KtHrwM')─┐
Encoded
└────────────────────────────────────┘
```

View File

@ -96,7 +96,7 @@ For more information, see the link: [RE2](https://github.com/google/re2/blob/mas
## translate(s, from, to)
The function replaces characters in the string s in accordance with one-to-one character mapping defined by from and to strings. from and to must be ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
The function replaces characters in the string s in accordance with one-to-one character mapping defined by from and to strings. from and to must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
Example:
@ -112,7 +112,7 @@ SELECT translate('Hello, World!', 'delor', 'DELOR') AS res
## translateUTF8(string, from, to)
Similar to previous function, but works with UTF-8 arguments. from and to must be valid UTF-8 strings of the same size.
Similar to previous function, but works with UTF-8 arguments. from and to must be valid constant UTF-8 strings of the same size.
Example:

View File

@ -248,6 +248,15 @@ ClickHouse supports general purpose codecs and specialized codecs.
High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage.
#### DEFLATE_QPL
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:
- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`.
- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions
- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device
- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512
### Specialized Codecs
These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation.

View File

@ -36,7 +36,7 @@ Additional join types available in ClickHouse:
- `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types.
- `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below.
:::note
:::note
When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
:::
@ -64,7 +64,7 @@ Rows are joined if the whole complex condition is met. If the conditions are not
The `OR` operator inside the `ON` clause works using the hash join algorithm — for each `OR` argument with join keys for `JOIN`, a separate hash table is created, so memory consumption and query execution time grow linearly with an increase in the number of expressions `OR` of the `ON` clause.
:::note
:::note
If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far.
:::
@ -83,7 +83,7 @@ Consider `table_1` and `table_2`:
Query with one join key condition and an additional condition for `table_2`:
``` sql
SELECT name, text FROM table_1 LEFT OUTER JOIN table_2
SELECT name, text FROM table_1 LEFT OUTER JOIN table_2
ON table_1.Id = table_2.Id AND startsWith(table_2.text, 'Text');
```
@ -100,7 +100,7 @@ Note that the result contains the row with the name `C` and the empty text colum
Query with `INNER` type of a join and multiple conditions:
``` sql
SELECT name, text, scores FROM table_1 INNER JOIN table_2
SELECT name, text, scores FROM table_1 INNER JOIN table_2
ON table_1.Id = table_2.Id AND table_2.scores > 10 AND startsWith(table_2.text, 'Text');
```
@ -199,7 +199,7 @@ For example, consider the following tables:
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` cant be joined.
:::note
:::note
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
:::

View File

@ -18,7 +18,6 @@ sidebar_label: "Используемые сторонние библиотеки
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) |
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |

View File

@ -15,16 +15,15 @@ $ make
Генерация данных:
:::danger "Внимание"
:::warning "Внимание"
-s 100 dbgen генерирует 600 миллионов строк (67 ГБ)
-s 1000 dbgen генерирует 6 миллиардов строк (занимает много времени)
:::
``` bash
$ ./dbgen -s 1000 -T c
$ ./dbgen -s 1000 -T l
$ ./dbgen -s 1000 -T p
$ ./dbgen -s 1000 -T s
$ ./dbgen -s 1000 -T d
```
Создание таблиц в Кликхауз:
@ -105,11 +104,10 @@ $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
``` sql
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
AS SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,

View File

@ -19,6 +19,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
@ -52,6 +53,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnames) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
@ -171,6 +173,12 @@ SELECT * FROM nestedt FORMAT TSV
При парсинге первая строка должна содержать имена столбцов. Вы можете использовать имена столбцов, чтобы указать их порядок расположения, или чтобы проверить их корректность.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
Этот формат также доступен под именем `TSVWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes}
@ -178,6 +186,14 @@ SELECT * FROM nestedt FORMAT TSV
Отличается от формата `TabSeparated` тем, что в первой строке пишутся имена столбцов, а во второй - типы столбцов.
При парсинге, первая и вторая строка полностью игнорируется.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
Этот формат также доступен под именем `TSVWithNamesAndTypes`.
## Template {#format-template}
@ -374,6 +390,24 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
Выводит также заголовок, аналогично [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## CSVWithNamesAndTypes {#csvwithnamesandtypes}
В первой строке пишутся имена столбцов, а во второй - типы столбцов, аналогично [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes)
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## CustomSeparated {#format-customseparated}
Аналогичен [Template](#format-template), но выводит (или считывает) все имена и типы столбцов, используя для них правило экранирования из настройки [format_custom_escaping_rule](../operations/settings/settings.md#format-custom-escaping-rule) и разделители из настроек [format_custom_field_delimiter](../operations/settings/settings.md#format-custom-field-delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format-custom-row-before-delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format-custom-row-after-delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format-custom-row-between-delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format-custom-result-before-delimiter) и [format_custom_result_after_delimiter](../operations/settings/settings.md#format-custom-result-after-delimiter), а не из форматных строк.
@ -384,10 +418,24 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
Выводит также заголовок с именами столбцов, аналогичен формату [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Выводит также два заголовка с именами и типами столбцов, аналогичен формату [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## JSON {#json}
Выводит данные в формате JSON. Кроме таблицы с данными, также выводятся имена и типы столбцов, и некоторая дополнительная информация - общее количество выведенных строк, а также количество строк, которое могло бы быть выведено, если бы не было LIMIT-а. Пример:
@ -660,6 +708,14 @@ SELECT * FROM json_square_brackets;
Отличается от `JSONCompactEachRow`/`JSONCompactStringsEachRow` тем, что имена и типы столбцов записываются как первые две строки.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
```json
["'hello'", "multiply(42, number)", "range(5)"]
["String", "UInt64", "Array(UInt8)"]
@ -904,6 +960,20 @@ Array представлены как длина в формате varint (unsig
Для поддержки [NULL](../sql-reference/syntax.md#null-literal) перед каждым значением типа [Nullable](../sql-reference/data-types/nullable.md) следует байт содержащий 1 или 0. Если байт 1, то значение равно NULL, и этот байт интерпретируется как отдельное значение (т.е. после него следует значение следующего поля). Если байт 0, то после байта следует значение поля (не равно NULL).
## RowBinaryWithNames {#rowbinarywithnames}
То же самое что [RowBinary](#rowbinary), но добавляется заголовок:
- Количество колонок - N, закодированное [LEB128](https://en.wikipedia.org/wiki/LEB128),
- N строк (`String`) с именами колонок,
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes}
То же самое что [RowBinary](#rowbinary), но добавляется заголовок:
@ -912,6 +982,14 @@ Array представлены как длина в формате varint (unsig
- N строк (`String`) с именами колонок,
- N строк (`String`) с типами колонок.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## Values {#data-format-values}
Выводит каждую строку в скобках. Строки разделены запятыми. После последней строки запятой нет. Значения внутри скобок также разделены запятыми. Числа выводятся в десятичном виде без кавычек. Массивы выводятся в квадратных скобках. Строки, даты, даты-с-временем выводятся в кавычках. Правила экранирования и особенности парсинга аналогичны формату [TabSeparated](#tabseparated). При форматировании, лишние пробелы не ставятся, а при парсинге - допустимы и пропускаются (за исключением пробелов внутри значений типа массив, которые недопустимы). [NULL](../sql-reference/syntax.md) представляется как `NULL`.

View File

@ -44,7 +44,7 @@ ClickHouse перезагружает встроенные словари с з
- `min_part_size` - Минимальный размер части таблицы.
- `min_part_size_ratio` - Отношение размера минимальной части таблицы к полному размеру таблицы.
- `method` - Метод сжатия. Возможные значения: `lz4`, `lz4hc`, `zstd`.
- `method` - Метод сжатия. Возможные значения: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`.
- `level` Уровень сжатия. См. [Кодеки](../../sql-reference/statements/create/table/#create-query-common-purpose-codecs).
Можно сконфигурировать несколько разделов `<case>`.

View File

@ -527,7 +527,7 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
- [Использование вложенных структур](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format.
## input_format_with_names_use_header {#settings-input-format-with-names-use-header}
## input_format_with_names_use_header {#input_format_with_names_use_header}
Включает или отключает проверку порядка столбцов при вставке данных.
@ -535,8 +535,38 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
Поддерживаемые форматы:
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [CSVWithNamesAndTypes](../../interfaces/formats.md#csvwithnamesandtypes)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
- [JSONCompactEachRowWithNames](../../interfaces/formats.md#jsoncompacteachrowwithnames)
- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md#jsoncompactstringseachrowwithnames)
- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
- [RowBinaryWithNames](../../interfaces/formats.md#rowbinarywithnames)
- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes)
- [CustomSeparatedWithNames](../../interfaces/formats.md#customseparatedwithnames)
- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md#customseparatedwithnamesandtypes)
Возможные значения:
- 0 — выключена.
- 1 — включена.
Значение по умолчанию: 1.
## input_format_with_types_use_header {#input_format_with_types_use_header}
Определяет, должен ли синтаксический анализатор формата проверять, соответствуют ли типы данных из входных данных типам данных из целевой таблицы.
Поддерживаемые форматы:
- [CSVWithNamesAndTypes](../../interfaces/formats.md#csvwithnamesandtypes)
- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes)
- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md#customseparatedwithnamesandtypes)
Возможные значения:
@ -626,8 +656,9 @@ ClickHouse может парсить только базовый формат `Y
Изменяет поведение операций, выполняемых со строгостью `ANY`.
:::danger "Внимание"
:::warning "Внимание"
Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../../engines/table-engines/special/join.md).
:::
Возможные значения:
@ -2082,8 +2113,9 @@ SELECT * FROM test_table
Устанавливает приоритет ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) для потоков, исполняющих запросы. Планировщик ОС учитывает эти приоритеты при выборе следующего потока для исполнения на доступном ядре CPU.
:::danger "Предупреждение"
:::warning "Предупреждение"
Для использования этой настройки необходимо установить свойство `CAP_SYS_NICE`. Пакет `clickhouse-server` устанавливает его во время инсталляции. Некоторые виртуальные окружения не позволяют установить `CAP_SYS_NICE`. В этом случае, `clickhouse-server` выводит сообщение при запуске.
:::
Допустимые значения:

View File

@ -5,7 +5,7 @@ sidebar_label: AggregateFunction
# AggregateFunction {#data-type-aggregatefunction}
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create.md#create-view). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
`AggregateFunction(name, types_of_arguments…)` — параметрический тип данных.
@ -63,5 +63,4 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP
## Пример использования {#primer-ispolzovaniia}
Смотрите в описании движка [AggregatingMergeTree](../../sql-reference/data-types/aggregatefunction.md).
Смотрите в описании движка [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md).

View File

@ -542,6 +542,7 @@ SOURCE(MYSQL(
:::info "Примечание"
Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`.
Явный параметр `secure` отсутствует. Автоматически поддержана работа в обоих случаях: когда установка SSL-соединения необходима и когда нет.
MySQL можно подключить на локальном хосте через сокеты, для этого необходимо задать `host` и `socket`.

View File

@ -490,22 +490,21 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2);
Возвращает сконвертированную из кодировки from в кодировку to строку s.
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(plaintext[, alphabet_name]) {#base58}
## Base58Encode(plaintext), Base58Decode(encoded_text) {#base58}
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием указанного алфавита.
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием стандартного алфавита Bitcoin.
**Синтаксис**
```sql
base58Encode(decoded[, alphabet_name])
base58Decode(encoded[, alphabet_name])
encodeBase58(decoded)
decodeBase58(encoded)
```
**Аргументы**
- `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md).
- `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`.
- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `bitcoin`.
**Возвращаемое значение**
@ -518,16 +517,16 @@ base58Decode(encoded[, alphabet_name])
Запрос:
``` sql
SELECT base58Encode('encode', 'flickr');
SELECT base58Decode('izCFiDUY', 'ripple');
SELECT encodeBase58('encode');
SELECT decodeBase58('izCFiDUY');
```
Результат:
```text
┌─base58Encode('encode', 'flickr')─┐
┌─encodeBase58('encode', 'flickr')─┐
│ SvyTHb1D │
└──────────────────────────────────┘
┌─base58Decode('izCFiDUY', 'ripple')─┐
┌─decodeBase58('izCFiDUY', 'ripple')─┐
│ decode │
└────────────────────────────────────┘
```

View File

@ -85,7 +85,7 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
## translate(s, from, to)
Данная функция заменяет символы в строке s в соответствии с поэлементным отображением определяемым строками from и to. from и to должны быть корректными ASCII строками одного размера. Не ASCII символы в оригинальной строке не изменяются.
Данная функция заменяет символы в строке s в соответствии с поэлементным отображением определяемым строками from и to. from и to должны быть корректными константными ASCII строками одного размера. Не ASCII символы в оригинальной строке не изменяются.
Example:
@ -101,7 +101,7 @@ SELECT translate('Hello, World!', 'delor', 'DELOR') AS res
## translateUTF8(string, from, to)
Аналогично предыдущей функции, но работает со строками, состоящими из UTF-8 символов. from и to должны быть корректными UTF-8 строками одного размера.
Аналогично предыдущей функции, но работает со строками, состоящими из UTF-8 символов. from и to должны быть корректными константными UTF-8 строками одного размера.
Example:

View File

@ -15,15 +15,14 @@ $ make
开始生成数据:
!!! warning "注意"
:::warning "注意"
使用`-s 100`dbgen 将生成 6 亿行数据(67GB), 如果使用`-s 1000`它会生成 60 亿行数据(这需要很多时间))
:::
```bash
$ ./dbgen -s 1000 -T c
$ ./dbgen -s 1000 -T l
$ ./dbgen -s 1000 -T p
$ ./dbgen -s 1000 -T s
$ ./dbgen -s 1000 -T d
```
在 ClickHouse 中创建数据表:
@ -106,10 +105,8 @@ $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
AS SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,

View File

@ -18,7 +18,7 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A
option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"
${ENABLE_CLICKHOUSE_ALL})
if (CLICKHOUSE_SPLIT_BINARY OR NOT ENABLE_UTILS)
if (CLICKHOUSE_SPLIT_BINARY)
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" OFF)
else ()
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON)
@ -434,6 +434,9 @@ else ()
endif ()
set (CLICKHOUSE_BUNDLE)
if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
list(APPEND CLICKHOUSE_BUNDLE self-extracting)
endif ()
if (ENABLE_CLICKHOUSE_SERVER)
add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -79,6 +79,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
("block-size,b", po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
("hc", "use LZ4HC instead of LZ4")
("zstd", "use ZSTD instead of LZ4")
("deflate_qpl", "use deflate_qpl instead of LZ4")
("codec", po::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
("level", po::value<int>(), "compression level for codecs specified via flags")
("none", "use no compression instead of LZ4")
@ -103,6 +104,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
bool decompress = options.count("decompress");
bool use_lz4hc = options.count("hc");
bool use_zstd = options.count("zstd");
bool use_deflate_qpl = options.count("deflate_qpl");
bool stat_mode = options.count("stat");
bool use_none = options.count("none");
unsigned block_size = options["block-size"].as<unsigned>();
@ -110,7 +112,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
if (options.count("codec"))
codecs = options["codec"].as<std::vector<std::string>>();
if ((use_lz4hc || use_zstd || use_none) && !codecs.empty())
if ((use_lz4hc || use_zstd || use_deflate_qpl || use_none) && !codecs.empty())
throw Exception("Wrong options, codec flags like --zstd and --codec options are mutually exclusive", ErrorCodes::BAD_ARGUMENTS);
if (!codecs.empty() && options.count("level"))
@ -122,6 +124,8 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
method_family = "LZ4HC";
else if (use_zstd)
method_family = "ZSTD";
else if (use_deflate_qpl)
method_family = "DEFLATE_QPL";
else if (use_none)
method_family = "NONE";

View File

@ -34,6 +34,10 @@
#include <base/bit_cast.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <memory>
#include <cmath>
#include <unistd.h>
@ -95,6 +99,9 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_SEEK_THROUGH_FILE;
extern const int UNKNOWN_FORMAT_VERSION;
extern const int INCORRECT_NUMBER_OF_COLUMNS;
extern const int TYPE_MISMATCH;
}
@ -115,6 +122,12 @@ public:
/// Deterministically change seed to some other value. This can be used to generate more values than were in source.
virtual void updateSeed() = 0;
/// Save into file. Binary, platform-dependent, version-dependent serialization.
virtual void serialize(WriteBuffer & out) const = 0;
/// Read from file
virtual void deserialize(ReadBuffer & in) = 0;
virtual ~IModel() = default;
};
@ -189,6 +202,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -230,6 +245,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -279,6 +296,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -311,6 +330,8 @@ class IdentityModel : public IModel
public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -395,6 +416,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -431,6 +454,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -469,6 +494,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -512,6 +539,26 @@ struct MarkovModelParameters
size_t frequency_add;
double frequency_desaturate;
size_t determinator_sliding_window_size;
void serialize(WriteBuffer & out) const
{
writeBinary(order, out);
writeBinary(frequency_cutoff, out);
writeBinary(num_buckets_cutoff, out);
writeBinary(frequency_add, out);
writeBinary(frequency_desaturate, out);
writeBinary(determinator_sliding_window_size, out);
}
void deserialize(ReadBuffer & in)
{
readBinary(order, in);
readBinary(frequency_cutoff, in);
readBinary(num_buckets_cutoff, in);
readBinary(frequency_add, in);
readBinary(frequency_desaturate, in);
readBinary(determinator_sliding_window_size, in);
}
};
@ -565,6 +612,39 @@ private:
return END;
}
void serialize(WriteBuffer & out) const
{
writeBinary(total, out);
writeBinary(count_end, out);
size_t size = buckets.size();
writeBinary(size, out);
for (const auto & elem : buckets)
{
writeBinary(elem.first, out);
writeBinary(elem.second, out);
}
}
void deserialize(ReadBuffer & in)
{
readBinary(total, in);
readBinary(count_end, in);
size_t size = 0;
readBinary(size, in);
buckets.reserve(size);
for (size_t i = 0; i < size; ++i)
{
Buckets::value_type elem;
readBinary(elem.first, in);
readBinary(elem.second, in);
buckets.emplace(std::move(elem));
}
}
};
using Table = HashMap<NGramHash, Histogram, TrivialHash>;
@ -621,6 +701,37 @@ public:
explicit MarkovModel(MarkovModelParameters params_)
: params(std::move(params_)), code_points(params.order, BEGIN) {}
void serialize(WriteBuffer & out) const
{
params.serialize(out);
size_t size = table.size();
writeBinary(size, out);
for (const auto & elem : table)
{
writeBinary(elem.getKey(), out);
elem.getMapped().serialize(out);
}
}
void deserialize(ReadBuffer & in)
{
params.deserialize(in);
size_t size = 0;
readBinary(size, in);
table.reserve(size);
for (size_t i = 0; i < size; ++i)
{
NGramHash key{};
readBinary(key, in);
Histogram & histogram = table[key];
histogram.deserialize(in);
}
}
void consume(const char * data, size_t size)
{
/// First 'order' number of code points are pre-filled with BEGIN.
@ -655,7 +766,6 @@ public:
}
}
void finalize()
{
if (params.num_buckets_cutoff)
@ -878,6 +988,16 @@ public:
{
seed = hash(seed);
}
void serialize(WriteBuffer & out) const override
{
markov_model.serialize(out);
}
void deserialize(ReadBuffer & in) override
{
markov_model.deserialize(in);
}
};
@ -916,6 +1036,16 @@ public:
{
nested_model->updateSeed();
}
void serialize(WriteBuffer & out) const override
{
nested_model->serialize(out);
}
void deserialize(ReadBuffer & in) override
{
nested_model->deserialize(in);
}
};
@ -954,6 +1084,16 @@ public:
{
nested_model->updateSeed();
}
void serialize(WriteBuffer & out) const override
{
nested_model->serialize(out);
}
void deserialize(ReadBuffer & in) override
{
nested_model->deserialize(in);
}
};
@ -1046,6 +1186,18 @@ public:
for (auto & model : models)
model->updateSeed();
}
void serialize(WriteBuffer & out) const
{
for (const auto & model : models)
model->serialize(out);
}
void deserialize(ReadBuffer & in)
{
for (auto & model : models)
model->deserialize(in);
}
};
}
@ -1068,8 +1220,10 @@ try
("input-format", po::value<std::string>(), "input format of the initial table data")
("output-format", po::value<std::string>(), "default output format")
("seed", po::value<std::string>(), "seed (arbitrary string), must be random string with at least 10 bytes length; note that a seed for each column is derived from this seed and a column name: you can obfuscate data for different tables and as long as you use identical seed and identical column names, the data for corresponding non-text columns for different tables will be transformed in the same way, so the data for different tables can be JOINed after obfuscation")
("limit", po::value<UInt64>(), "if specified - stop after generating that number of rows")
("limit", po::value<UInt64>(), "if specified - stop after generating that number of rows; the limit can be also greater than the number of source dataset - in this case it will process the dataset in a loop more than one time, using different seeds on every iteration, generating result as large as needed")
("silent", po::value<bool>()->default_value(false), "don't print information messages to stderr")
("save", po::value<std::string>(), "save the models after training to the specified file. You can use --limit 0 to skip the generation step. The file is using binary, platform-dependent, opaque serialization format. The model parameters are saved, while the seed is not.")
("load", po::value<std::string>(), "load the models instead of training from the specified file. The table structure must match the saved file. The seed should be specified separately, while other model parameters are loaded.")
("order", po::value<UInt64>()->default_value(5), "order of markov model to generate strings")
("frequency-cutoff", po::value<UInt64>()->default_value(5), "frequency cutoff for markov model: remove all buckets with count less than specified")
("num-buckets-cutoff", po::value<UInt64>()->default_value(0), "cutoff for number of different possible continuations for a context: remove all histograms with less than specified number of buckets")
@ -1096,12 +1250,26 @@ try
return 0;
}
if (options.count("save") && options.count("load"))
{
std::cerr << "The options --save and --load cannot be used together.\n";
return 1;
}
UInt64 seed = sipHash64(options["seed"].as<std::string>());
std::string structure = options["structure"].as<std::string>();
std::string input_format = options["input-format"].as<std::string>();
std::string output_format = options["output-format"].as<std::string>();
std::string load_from_file;
std::string save_into_file;
if (options.count("load"))
load_from_file = options["load"].as<std::string>();
else if (options.count("save"))
save_into_file = options["save"].as<std::string>();
UInt64 limit = 0;
if (options.count("limit"))
limit = options["limit"].as<UInt64>();
@ -1117,7 +1285,7 @@ try
markov_model_params.frequency_desaturate = options["frequency-desaturate"].as<double>();
markov_model_params.determinator_sliding_window_size = options["determinator-sliding-window-size"].as<UInt64>();
// Create header block
/// Create the header block
std::vector<std::string> structure_vals;
boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on);
@ -1143,6 +1311,7 @@ try
ReadBufferFromFileDescriptor file_in(STDIN_FILENO);
WriteBufferFromFileDescriptor file_out(STDOUT_FILENO);
if (load_from_file.empty())
{
/// stdin must be seekable
auto res = lseek(file_in.getFD(), 0, SEEK_SET);
@ -1156,6 +1325,9 @@ try
/// Train step
UInt64 source_rows = 0;
bool rewind_needed = false;
if (load_from_file.empty())
{
if (!silent)
std::cerr << "Training models\n";
@ -1173,11 +1345,71 @@ try
if (!silent)
std::cerr << "Processed " << source_rows << " rows\n";
}
obfuscator.finalize();
rewind_needed = true;
}
else
{
if (!silent)
std::cerr << "Loading models\n";
ReadBufferFromFile model_file_in(load_from_file);
CompressedReadBuffer model_in(model_file_in);
UInt8 version = 0;
readBinary(version, model_in);
if (version != 0)
throw Exception("Unknown version of the model file", ErrorCodes::UNKNOWN_FORMAT_VERSION);
readBinary(source_rows, model_in);
Names data_types = header.getDataTypeNames();
size_t header_size = 0;
readBinary(header_size, model_in);
if (header_size != data_types.size())
throw Exception("The saved model was created for different number of columns", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS);
for (size_t i = 0; i < header_size; ++i)
{
String type;
readBinary(type, model_in);
if (type != data_types[i])
throw Exception("The saved model was created for different types of columns", ErrorCodes::TYPE_MISMATCH);
}
obfuscator.deserialize(model_in);
}
obfuscator.finalize();
if (!save_into_file.empty())
{
if (!silent)
std::cerr << "Saving models\n";
if (!limit)
WriteBufferFromFile model_file_out(save_into_file);
CompressedWriteBuffer model_out(model_file_out, CompressionCodecFactory::instance().get("ZSTD", 1));
/// You can change version on format change, it is currently set to zero.
UInt8 version = 0;
writeBinary(version, model_out);
writeBinary(source_rows, model_out);
/// We are writing the data types for validation, because the models serialization depends on the data types.
Names data_types = header.getDataTypeNames();
size_t header_size = data_types.size();
writeBinary(header_size, model_out);
for (const auto & type : data_types)
writeBinary(type, model_out);
/// Write the models.
obfuscator.serialize(model_out);
model_out.finalize();
model_file_out.finalize();
}
if (!options.count("limit"))
limit = source_rows;
/// Generation step
@ -1187,7 +1419,8 @@ try
if (!silent)
std::cerr << "Generating data\n";
file_in.seek(0, SEEK_SET);
if (rewind_needed)
file_in.rewind();
Pipe pipe(context->getInputFormat(input_format, file_in, header, max_block_size));
@ -1220,6 +1453,7 @@ try
out_executor.finish();
obfuscator.updateSeed();
rewind_needed = true;
}
return 0;

View File

@ -1,6 +1,18 @@
if (NOT(
CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR
)
)
set (COMPRESSOR "${CMAKE_BINARY_DIR}/native/utils/self-extracting-executable/pre_compressor")
set (DECOMPRESSOR "--decompressor=${CMAKE_BINARY_DIR}/utils/self-extracting-executable/decompressor")
else ()
set (COMPRESSOR "${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor")
endif ()
add_custom_target (self-extracting ALL
${CMAKE_COMMAND} -E remove clickhouse
COMMAND ${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor clickhouse ../clickhouse
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse
DEPENDS clickhouse compressor
)

View File

@ -509,7 +509,6 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
if (referrals)
{
SCOPE_EXIT({
// ldap_value_free(referrals);
ber_memvfree(reinterpret_cast<void **>(referrals));
referrals = nullptr;
});

View File

@ -13,20 +13,20 @@ using FileInfo = IBackupCoordination::FileInfo;
BackupCoordinationLocal::BackupCoordinationLocal() = default;
BackupCoordinationLocal::~BackupCoordinationLocal() = default;
void BackupCoordinationLocal::setStatus(const String &, const String &, const String &)
void BackupCoordinationLocal::setStage(const String &, const String &, const String &)
{
}
void BackupCoordinationLocal::setErrorStatus(const String &, const Exception &)
void BackupCoordinationLocal::setError(const String &, const Exception &)
{
}
Strings BackupCoordinationLocal::waitStatus(const Strings &, const String &)
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &)
{
return {};
}
Strings BackupCoordinationLocal::waitStatusFor(const Strings &, const String &, UInt64)
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
{
return {};
}

View File

@ -20,10 +20,10 @@ public:
BackupCoordinationLocal();
~BackupCoordinationLocal() override;
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;

View File

@ -165,55 +165,94 @@ namespace
constexpr size_t NUM_ATTEMPTS = 10;
}
BackupCoordinationRemote::BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
BackupCoordinationRemote::BackupCoordinationRemote(
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("BackupCoordination"))
, remove_zk_nodes_in_destructor(remove_zk_nodes_in_destructor_)
{
createRootNodes();
stage_sync.emplace(
zookeeper_path_ + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("BackupCoordination"));
}
BackupCoordinationRemote::~BackupCoordinationRemote() = default;
BackupCoordinationRemote::~BackupCoordinationRemote()
{
try
{
if (remove_zk_nodes_in_destructor)
removeAllNodes();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
zkutil::ZooKeeperPtr BackupCoordinationRemote::getZooKeeper() const
{
std::lock_guard lock{mutex};
return getZooKeeperNoLock();
}
zkutil::ZooKeeperPtr BackupCoordinationRemote::getZooKeeperNoLock() const
{
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
}
return zookeeper;
}
void BackupCoordinationRemote::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_mutations", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_access", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_infos", "");
zookeeper->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
auto zk = getZooKeeper();
zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path, "");
zk->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zk->createIfNotExists(zookeeper_path + "/repl_mutations", "");
zk->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zk->createIfNotExists(zookeeper_path + "/repl_access", "");
zk->createIfNotExists(zookeeper_path + "/file_names", "");
zk->createIfNotExists(zookeeper_path + "/file_infos", "");
zk->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
}
void BackupCoordinationRemote::removeAllNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->removeRecursive(zookeeper_path);
/// Usually this function is called by the initiator when a backup is complete so we don't need the coordination anymore.
///
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
/// while some hosts are still making the backup. Removing all the nodes will remove the parent node of the backup coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some useless part
/// of their backup work before that. Anyway in this case backup won't be finalized (because only an initiator can do that).
auto zk = getZooKeeper();
zk->removeRecursive(zookeeper_path);
}
void BackupCoordinationRemote::setStatus(const String & current_host, const String & new_status, const String & message)
void BackupCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
{
status_sync.set(current_host, new_status, message);
stage_sync->set(current_host, new_stage, message);
}
void BackupCoordinationRemote::setErrorStatus(const String & current_host, const Exception & exception)
void BackupCoordinationRemote::setError(const String & current_host, const Exception & exception)
{
status_sync.setError(current_host, exception);
stage_sync->setError(current_host, exception);
}
Strings BackupCoordinationRemote::waitStatus(const Strings & all_hosts, const String & status_to_wait)
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
{
return status_sync.wait(all_hosts, status_to_wait);
return stage_sync->wait(all_hosts, stage_to_wait);
}
Strings BackupCoordinationRemote::waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return status_sync.waitFor(all_hosts, status_to_wait, timeout_ms);
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
@ -229,11 +268,11 @@ void BackupCoordinationRemote::addReplicatedPartNames(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedPartNames() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zookeeper->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
zk->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
}
Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
@ -255,11 +294,11 @@ void BackupCoordinationRemote::addReplicatedMutations(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedMutations() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zookeeper->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent);
zk->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent);
}
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
@ -279,11 +318,11 @@ void BackupCoordinationRemote::addReplicatedDataPath(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedDataPath() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(data_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
}
Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const
@ -300,18 +339,18 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
return;
replicated_tables.emplace();
auto zookeeper = get_zookeeper();
auto zk = getZooKeeperNoLock();
{
String path = zookeeper_path + "/repl_part_names";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto part_names = ReplicatedPartNames::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
auto part_names = ReplicatedPartNames::deserialize(zk->get(path2 + "/" + escaped_replica_name));
replicated_tables->addPartNames(table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
}
}
@ -319,14 +358,14 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
{
String path = zookeeper_path + "/repl_mutations";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto mutations = ReplicatedMutations::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
auto mutations = ReplicatedMutations::deserialize(zk->get(path2 + "/" + escaped_replica_name));
replicated_tables->addMutations(table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations);
}
}
@ -334,11 +373,11 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
{
String path = zookeeper_path + "/repl_data_paths";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_data_path : zookeeper->getChildren(path2))
for (const String & escaped_data_path : zk->getChildren(path2))
{
String data_path = unescapeForFileName(escaped_data_path);
replicated_tables->addDataPath(table_shared_id, data_path);
@ -356,13 +395,13 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedAccessFilePath() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_access/" + escapeForFileName(access_zk_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + host_id;
zookeeper->createIfNotExists(path, file_path);
zk->createIfNotExists(path, file_path);
}
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
@ -378,20 +417,20 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
return;
replicated_access.emplace();
auto zookeeper = get_zookeeper();
auto zk = getZooKeeperNoLock();
String path = zookeeper_path + "/repl_access";
for (const String & escaped_access_zk_path : zookeeper->getChildren(path))
for (const String & escaped_access_zk_path : zk->getChildren(path))
{
String access_zk_path = unescapeForFileName(escaped_access_zk_path);
String path2 = path + "/" + escaped_access_zk_path;
for (const String & type_str : zookeeper->getChildren(path2))
for (const String & type_str : zk->getChildren(path2))
{
AccessEntityType type = AccessEntityTypeInfo::parseType(type_str);
String path3 = path2 + "/" + type_str;
for (const String & host_id : zookeeper->getChildren(path3))
for (const String & host_id : zk->getChildren(path3))
{
String file_path = zookeeper->get(path3 + "/" + host_id);
String file_path = zk->get(path3 + "/" + host_id);
replicated_access->addFilePath(access_zk_path, type, host_id, file_path);
}
}
@ -401,11 +440,11 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
void BackupCoordinationRemote::addFileInfo(const FileInfo & file_info, bool & is_data_file_required)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String full_path = zookeeper_path + "/file_names/" + escapeForFileName(file_info.file_name);
String size_and_checksum = serializeSizeAndChecksum(std::pair{file_info.size, file_info.checksum});
zookeeper->create(full_path, size_and_checksum, zkutil::CreateMode::Persistent);
zk->create(full_path, size_and_checksum, zkutil::CreateMode::Persistent);
if (!file_info.size)
{
@ -414,7 +453,7 @@ void BackupCoordinationRemote::addFileInfo(const FileInfo & file_info, bool & is
}
full_path = zookeeper_path + "/file_infos/" + size_and_checksum;
auto code = zookeeper->tryCreate(full_path, serializeFileInfo(file_info), zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(full_path, serializeFileInfo(file_info), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, full_path);
@ -426,15 +465,15 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
if (!file_info.size)
return; /// we don't keep FileInfos for empty files, nothing to update
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum = serializeSizeAndChecksum(std::pair{file_info.size, file_info.checksum});
String full_path = zookeeper_path + "/file_infos/" + size_and_checksum;
for (size_t attempt = 0; attempt < NUM_ATTEMPTS; ++attempt)
{
Coordination::Stat stat;
auto new_info = deserializeFileInfo(zookeeper->get(full_path, &stat));
auto new_info = deserializeFileInfo(zk->get(full_path, &stat));
new_info.archive_suffix = file_info.archive_suffix;
auto code = zookeeper->trySet(full_path, serializeFileInfo(new_info), stat.version);
auto code = zk->trySet(full_path, serializeFileInfo(new_info), stat.version);
if (code == Coordination::Error::ZOK)
return;
bool is_last_attempt = (attempt == NUM_ATTEMPTS - 1);
@ -445,16 +484,16 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
std::vector<FileInfo> file_infos;
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
for (const String & escaped_name : escaped_names)
{
String size_and_checksum = zookeeper->get(zookeeper_path + "/file_names/" + escaped_name);
String size_and_checksum = zk->get(zookeeper_path + "/file_names/" + escaped_name);
UInt64 size = deserializeSizeAndChecksum(size_and_checksum).first;
FileInfo file_info;
if (size) /// we don't keep FileInfos for empty files
file_info = deserializeFileInfo(zookeeper->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info = deserializeFileInfo(zk->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info.file_name = unescapeForFileName(escaped_name);
file_infos.emplace_back(std::move(file_info));
}
@ -463,8 +502,8 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
Strings BackupCoordinationRemote::listFiles(const String & directory, bool recursive) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
auto zk = getZooKeeper();
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
@ -496,8 +535,8 @@ Strings BackupCoordinationRemote::listFiles(const String & directory, bool recur
bool BackupCoordinationRemote::hasFiles(const String & directory) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
auto zk = getZooKeeper();
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
@ -515,42 +554,42 @@ bool BackupCoordinationRemote::hasFiles(const String & directory) const
std::optional<FileInfo> BackupCoordinationRemote::getFileInfo(const String & file_name) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum;
if (!zookeeper->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
if (!zk->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
return std::nullopt;
UInt64 size = deserializeSizeAndChecksum(size_and_checksum).first;
FileInfo file_info;
if (size) /// we don't keep FileInfos for empty files
file_info = deserializeFileInfo(zookeeper->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info = deserializeFileInfo(zk->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info.file_name = file_name;
return file_info;
}
std::optional<FileInfo> BackupCoordinationRemote::getFileInfo(const SizeAndChecksum & size_and_checksum) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String file_info_str;
if (!zookeeper->tryGet(zookeeper_path + "/file_infos/" + serializeSizeAndChecksum(size_and_checksum), file_info_str))
if (!zk->tryGet(zookeeper_path + "/file_infos/" + serializeSizeAndChecksum(size_and_checksum), file_info_str))
return std::nullopt;
return deserializeFileInfo(file_info_str);
}
std::optional<SizeAndChecksum> BackupCoordinationRemote::getFileSizeAndChecksum(const String & file_name) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum;
if (!zookeeper->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
if (!zk->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
return std::nullopt;
return deserializeSizeAndChecksum(size_and_checksum);
}
String BackupCoordinationRemote::getNextArchiveSuffix()
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/archive_suffixes/a";
String path_created;
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::PersistentSequential, path_created);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::PersistentSequential, path_created);
if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, path);
return formatArchiveSuffix(extractCounterFromSequentialNodeName(path_created));
@ -558,16 +597,11 @@ String BackupCoordinationRemote::getNextArchiveSuffix()
Strings BackupCoordinationRemote::getAllArchiveSuffixes() const
{
auto zookeeper = get_zookeeper();
Strings node_names = zookeeper->getChildren(zookeeper_path + "/archive_suffixes");
auto zk = getZooKeeper();
Strings node_names = zk->getChildren(zookeeper_path + "/archive_suffixes");
for (auto & node_name : node_names)
node_name = formatArchiveSuffix(extractCounterFromSequentialNodeName(node_name));
return node_names;
}
void BackupCoordinationRemote::drop()
{
removeAllNodes();
}
}

View File

@ -3,7 +3,7 @@
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationReplicatedAccess.h>
#include <Backups/BackupCoordinationReplicatedTables.h>
#include <Backups/BackupCoordinationStatusSync.h>
#include <Backups/BackupCoordinationStageSync.h>
namespace DB
@ -13,13 +13,13 @@ namespace DB
class BackupCoordinationRemote : public IBackupCoordination
{
public:
BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_);
BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_);
~BackupCoordinationRemote() override;
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void addReplicatedPartNames(
const String & table_shared_id,
@ -56,9 +56,9 @@ public:
String getNextArchiveSuffix() override;
Strings getAllArchiveSuffixes() const override;
void drop() override;
private:
zkutil::ZooKeeperPtr getZooKeeper() const;
zkutil::ZooKeeperPtr getZooKeeperNoLock() const;
void createRootNodes();
void removeAllNodes();
void prepareReplicatedTables() const;
@ -66,10 +66,12 @@ private:
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
const bool remove_zk_nodes_in_destructor;
BackupCoordinationStatusSync status_sync;
std::optional<BackupCoordinationStageSync> stage_sync;
mutable std::mutex mutex;
mutable zkutil::ZooKeeperPtr zookeeper;
mutable std::optional<BackupCoordinationReplicatedTables> replicated_tables;
mutable std::optional<BackupCoordinationReplicatedAccess> replicated_access;
};

View File

@ -0,0 +1,13 @@
#include <Backups/BackupCoordinationStage.h>
#include <fmt/format.h>
namespace DB
{
String BackupCoordinationStage::formatGatheringMetadata(size_t pass)
{
return fmt::format("{} ({})", GATHERING_METADATA, pass);
}
}

View File

@ -0,0 +1,41 @@
#pragma once
#include <base/types.h>
namespace DB
{
namespace BackupCoordinationStage
{
/// Finding all tables and databases which we're going to put to the backup and collecting their metadata.
constexpr const char * GATHERING_METADATA = "gathering metadata";
String formatGatheringMetadata(size_t pass);
/// Making temporary hard links and prepare backup entries.
constexpr const char * EXTRACTING_DATA_FROM_TABLES = "extracting data from tables";
/// Running special tasks for replicated tables which can also prepare some backup entries.
constexpr const char * RUNNING_POST_TASKS = "running post-tasks";
/// Writing backup entries to the backup and removing temporary hard links.
constexpr const char * WRITING_BACKUP = "writing backup";
/// Finding databases and tables in the backup which we're going to restore.
constexpr const char * FINDING_TABLES_IN_BACKUP = "finding tables in backup";
/// Creating databases or finding them and checking their definitions.
constexpr const char * CREATING_DATABASES = "creating databases";
/// Creating tables or finding them and checking their definition.
constexpr const char * CREATING_TABLES = "creating tables";
/// Inserting restored data to tables.
constexpr const char * INSERTING_DATA_TO_TABLES = "inserting data to tables";
/// Coordination stage meaning that a host finished its work.
constexpr const char * COMPLETED = "completed";
}
}

View File

@ -0,0 +1,201 @@
#include <Backups/BackupCoordinationStageSync.h>
#include <Common/Exception.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
}
BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, log(log_)
{
createRootNodes();
}
void BackupCoordinationStageSync::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message)
{
auto zookeeper = get_zookeeper();
/// Make an ephemeral node so the initiator can track if the current host is still working.
String alive_node_path = zookeeper_path + "/alive|" + current_host;
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
throw zkutil::KeeperException(code, alive_node_path);
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
zookeeper->create(zookeeper_path + "/current|" + current_host + "|" + new_stage, message, zkutil::CreateMode::Persistent);
}
void BackupCoordinationStageSync::setError(const String & current_host, const Exception & exception)
{
auto zookeeper = get_zookeeper();
WriteBufferFromOwnString buf;
writeStringBinary(current_host, buf);
writeException(exception, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
}
Strings BackupCoordinationStageSync::wait(const Strings & all_hosts, const String & stage_to_wait)
{
return waitImpl(all_hosts, stage_to_wait, {});
}
Strings BackupCoordinationStageSync::waitFor(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return waitImpl(all_hosts, stage_to_wait, timeout);
}
namespace
{
struct UnreadyHostState
{
bool started = false;
bool alive = false;
};
}
struct BackupCoordinationStageSync::State
{
Strings results;
std::map<String, UnreadyHostState> unready_hosts;
std::optional<std::pair<String, Exception>> error;
std::optional<String> host_terminated;
};
BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
{
std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};
State state;
if (zk_nodes_set.contains("error"))
{
ReadBufferFromOwnString buf{zookeeper->get(zookeeper_path + "/error")};
String host;
readStringBinary(host, buf);
state.error = std::make_pair(host, readException(buf, fmt::format("Got error from {}", host)));
return state;
}
for (const auto & host : all_hosts)
{
if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
{
UnreadyHostState unready_host_state;
unready_host_state.started = zk_nodes_set.contains("started|" + host);
unready_host_state.alive = zk_nodes_set.contains("alive|" + host);
state.unready_hosts.emplace(host, unready_host_state);
if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
state.host_terminated = host;
}
}
if (state.host_terminated || !state.unready_hosts.empty())
return state;
state.results.reserve(all_hosts.size());
for (const auto & host : all_hosts)
state.results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
return state;
}
Strings BackupCoordinationStageSync::waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const
{
if (all_hosts.empty())
return {};
/// Wait until all hosts are ready or an error happens or time is out.
auto zookeeper = get_zookeeper();
/// Set by ZooKepper when list of zk nodes have changed.
auto watch = std::make_shared<Poco::Event>();
bool use_timeout = timeout.has_value();
std::chrono::steady_clock::time_point end_of_timeout;
if (use_timeout)
end_of_timeout = std::chrono::steady_clock::now() + std::chrono::duration_cast<std::chrono::steady_clock::duration>(*timeout);
State state;
String previous_unready_host; /// Used for logging: we don't want to log the same unready host again.
for (;;)
{
/// Get zk nodes and subscribe on their changes.
Strings zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
/// Read and analyze the current state of zk nodes.
state = readCurrentState(zookeeper, zk_nodes, all_hosts, stage_to_wait);
if (state.error || state.host_terminated || state.unready_hosts.empty())
break; /// Error happened or everything is ready.
/// Log that we will wait for another host.
const auto & unready_host = state.unready_hosts.begin()->first;
if (unready_host != previous_unready_host)
{
LOG_TRACE(log, "Waiting for host {}", unready_host);
previous_unready_host = unready_host;
}
/// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed.
{
if (use_timeout)
{
auto current_time = std::chrono::steady_clock::now();
if ((current_time > end_of_timeout)
|| !watch->tryWait(std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time).count()))
break;
}
else
{
watch->wait();
}
}
}
/// Rethrow an error raised originally on another host.
if (state.error)
state.error->second.rethrow();
/// Another host terminated without errors.
if (state.host_terminated)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Host {} suddenly stopped working", *state.host_terminated);
/// Something's unready, timeout is probably not enough.
if (!state.unready_hosts.empty())
{
const auto & [unready_host, unready_host_state] = *state.unready_hosts.begin();
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Waited for host {} too long (> {}){}",
unready_host,
to_string(*timeout),
unready_host_state.started ? "" : ": Operation didn't start");
}
return state.results;
}
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <Common/ZooKeeper/Common.h>
namespace DB
{
/// Used to coordinate hosts so all hosts would come to a specific stage at around the same time.
class BackupCoordinationStageSync
{
public:
BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_stage, const String & message);
void setError(const String & current_host, const Exception & exception);
/// Sets the stage of the current host and waits until all hosts come to the same stage.
/// The function returns the messages all hosts set when they come to the required stage.
Strings wait(const Strings & all_hosts, const String & stage_to_wait);
/// Almost the same as setAndWait() but this one stops waiting and throws an exception after a specific amount of time.
Strings waitFor(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout);
private:
void createRootNodes();
struct State;
State readCurrentState(zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
Poco::Logger * log;
};
}

View File

@ -1,182 +0,0 @@
#include <Backups/BackupCoordinationStatusSync.h>
#include <Common/Exception.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
}
BackupCoordinationStatusSync::BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, log(log_)
{
createRootNodes();
}
void BackupCoordinationStatusSync::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status, const String & message)
{
auto zookeeper = get_zookeeper();
zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + new_status, message);
}
void BackupCoordinationStatusSync::setError(const String & current_host, const Exception & exception)
{
auto zookeeper = get_zookeeper();
Exception exception2 = exception;
exception2.addMessage("Host {}", current_host);
WriteBufferFromOwnString buf;
writeException(exception2, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
}
Strings BackupCoordinationStatusSync::wait(const Strings & all_hosts, const String & status_to_wait)
{
return waitImpl(all_hosts, status_to_wait, {});
}
Strings BackupCoordinationStatusSync::waitFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
{
return waitImpl(all_hosts, status_to_wait, timeout_ms);
}
Strings BackupCoordinationStatusSync::waitImpl(const Strings & all_hosts, const String & status_to_wait, std::optional<UInt64> timeout_ms)
{
if (all_hosts.empty())
return {};
/// Wait for other hosts.
Strings ready_hosts_results;
ready_hosts_results.resize(all_hosts.size());
std::map<String, std::vector<size_t> /* index in `ready_hosts_results` */> unready_hosts;
for (size_t i = 0; i != all_hosts.size(); ++i)
unready_hosts[all_hosts[i]].push_back(i);
std::optional<Exception> error;
auto zookeeper = get_zookeeper();
/// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`.
auto process_zk_nodes = [&](const Strings & zk_nodes)
{
for (const String & zk_node : zk_nodes)
{
if (zk_node.starts_with("remove_watch-"))
continue;
if (zk_node == "error")
{
ReadBufferFromOwnString buf{zookeeper->get(zookeeper_path + "/error")};
error = readException(buf, "", true);
break;
}
size_t separator_pos = zk_node.find('|');
if (separator_pos == String::npos)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node);
String host = zk_node.substr(0, separator_pos);
String status = zk_node.substr(separator_pos + 1);
auto it = unready_hosts.find(host);
if ((it != unready_hosts.end()) && (status == status_to_wait))
{
String result = zookeeper->get(zookeeper_path + "/" + zk_node);
for (size_t i : it->second)
ready_hosts_results[i] = result;
unready_hosts.erase(it);
}
}
};
/// Wait until all hosts are ready or an error happens or time is out.
std::atomic<bool> watch_set = false;
std::condition_variable watch_triggered_event;
auto watch_callback = [&](const Coordination::WatchResponse &)
{
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
watch_triggered_event.notify_all();
};
auto watch_triggered = [&] { return !watch_set; };
bool use_timeout = timeout_ms.has_value();
std::chrono::milliseconds timeout{timeout_ms.value_or(0)};
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
std::chrono::steady_clock::duration elapsed;
std::mutex dummy_mutex;
String previous_unready_host;
while (!unready_hosts.empty() && !error)
{
watch_set = true;
Strings nodes = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
process_zk_nodes(nodes);
if (!unready_hosts.empty() && !error)
{
const auto & unready_host = unready_hosts.begin()->first;
if (unready_host != previous_unready_host)
{
LOG_TRACE(log, "Waiting for host {}", unready_host);
previous_unready_host = unready_host;
}
std::unique_lock dummy_lock{dummy_mutex};
if (use_timeout)
{
elapsed = std::chrono::steady_clock::now() - start_time;
if ((elapsed > timeout) || !watch_triggered_event.wait_for(dummy_lock, timeout - elapsed, watch_triggered))
break;
}
else
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
}
if (watch_set)
{
/// Remove watch by triggering it.
zookeeper->create(zookeeper_path + "/remove_watch-", "", zkutil::CreateMode::EphemeralSequential);
std::unique_lock dummy_lock{dummy_mutex};
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
if (error)
error->rethrow();
if (!unready_hosts.empty())
{
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Waited for host {} too long ({})",
unready_hosts.begin()->first,
to_string(elapsed));
}
return ready_hosts_results;
}
}

View File

@ -1,37 +0,0 @@
#pragma once
#include <Common/ZooKeeper/Common.h>
namespace DB
{
/// Used to coordinate hosts so all hosts would come to a specific status at around the same time.
class BackupCoordinationStatusSync
{
public:
BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
/// Sets the status of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_status, const String & message);
void setError(const String & current_host, const Exception & exception);
/// Sets the status of the current host and waits until all hosts come to the same status.
/// The function returns the messages all hosts set when they come to the required status.
Strings wait(const Strings & all_hosts, const String & status_to_wait);
/// Almost the same as setAndWait() but this one stops waiting and throws an exception after a specific amount of time.
Strings waitFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms);
static constexpr const char * kErrorStatus = "error";
private:
void createRootNodes();
Strings waitImpl(const Strings & all_hosts, const String & status_to_wait, std::optional<UInt64> timeout_ms);
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
Poco::Logger * log;
};
}

View File

@ -1,6 +1,7 @@
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupUtils.h>
#include <Backups/DDLAdjustingForBackupVisitor.h>
#include <Databases/IDatabase.h>
@ -31,20 +32,11 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
namespace Stage = BackupCoordinationStage;
namespace
{
/// Finding all tables and databases which we're going to put to the backup and collecting their metadata.
constexpr const char * kGatheringMetadataStatus = "gathering metadata";
/// Making temporary hard links and prepare backup entries.
constexpr const char * kExtractingDataFromTablesStatus = "extracting data from tables";
/// Running special tasks for replicated tables which can also prepare some backup entries.
constexpr const char * kRunningPostTasksStatus = "running post-tasks";
/// Writing backup entries to the backup and removing temporary hard links.
constexpr const char * kWritingBackupStatus = "writing backup";
/// Uppercases the first character of a passed string.
String toUpperFirst(const String & str)
{
@ -90,7 +82,8 @@ BackupEntriesCollector::BackupEntriesCollector(
, backup_settings(backup_settings_)
, backup_coordination(backup_coordination_)
, context(context_)
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 300000))
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))
, log(&Poco::Logger::get("BackupEntriesCollector"))
{
}
@ -100,7 +93,7 @@ BackupEntriesCollector::~BackupEntriesCollector() = default;
BackupEntries BackupEntriesCollector::run()
{
/// run() can be called onle once.
if (!current_status.empty())
if (!current_stage.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries");
/// Find other hosts working along with us to execute this ON CLUSTER query.
@ -123,36 +116,40 @@ BackupEntries BackupEntriesCollector::run()
makeBackupEntriesForTablesDefs();
/// Make backup entries for the data of the found tables.
setStatus(kExtractingDataFromTablesStatus);
setStage(Stage::EXTRACTING_DATA_FROM_TABLES);
makeBackupEntriesForTablesData();
/// Run all the tasks added with addPostCollectingTask().
setStatus(kRunningPostTasksStatus);
setStage(Stage::RUNNING_POST_TASKS);
runPostTasks();
/// No more backup entries or tasks are allowed after this point.
setStatus(kWritingBackupStatus);
setStage(Stage::WRITING_BACKUP);
return std::move(backup_entries);
}
Strings BackupEntriesCollector::setStatus(const String & new_status, const String & message)
Strings BackupEntriesCollector::setStage(const String & new_stage, const String & message)
{
LOG_TRACE(log, "{}", toUpperFirst(new_status));
current_status = new_status;
LOG_TRACE(log, "{}", toUpperFirst(new_stage));
current_stage = new_stage;
backup_coordination->setStatus(backup_settings.host_id, new_status, message);
backup_coordination->setStage(backup_settings.host_id, new_stage, message);
if (new_status.starts_with(kGatheringMetadataStatus))
if (new_stage == Stage::formatGatheringMetadata(1))
{
auto now = std::chrono::steady_clock::now();
auto end_of_timeout = std::max(now, consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout);
return backup_coordination->waitStatusFor(
all_hosts, new_status, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - now).count());
return backup_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
}
else if (new_stage.starts_with(Stage::GATHERING_METADATA))
{
auto current_time = std::chrono::steady_clock::now();
auto end_of_timeout = std::max(current_time, consistent_metadata_snapshot_end_time);
return backup_coordination->waitForStage(
all_hosts, new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
}
else
{
return backup_coordination->waitStatus(all_hosts, new_status);
return backup_coordination->waitForStage(all_hosts, new_stage);
}
}
@ -173,18 +170,18 @@ void BackupEntriesCollector::calculateRootPathInBackup()
/// Finds databases and tables which we will put to the backup.
void BackupEntriesCollector::gatherMetadataAndCheckConsistency()
{
consistent_metadata_snapshot_start_time = std::chrono::steady_clock::now();
auto end_of_timeout = consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout;
setStatus(fmt::format("{} ({})", kGatheringMetadataStatus, 1));
setStage(Stage::formatGatheringMetadata(1));
consistent_metadata_snapshot_end_time = std::chrono::steady_clock::now() + consistent_metadata_snapshot_timeout;
for (size_t pass = 1;; ++pass)
{
String new_status = fmt::format("{} ({})", kGatheringMetadataStatus, pass + 1);
String next_stage = Stage::formatGatheringMetadata(pass + 1);
std::optional<Exception> inconsistency_error;
if (tryGatherMetadataAndCompareWithPrevious(inconsistency_error))
{
/// Gathered metadata and checked consistency, cool! But we have to check that other hosts cope with that too.
auto all_hosts_results = setStatus(new_status, "consistent");
auto all_hosts_results = setStage(next_stage, "consistent");
std::optional<String> host_with_inconsistency;
std::optional<String> inconsistency_error_on_other_host;
@ -210,13 +207,13 @@ void BackupEntriesCollector::gatherMetadataAndCheckConsistency()
else
{
/// Failed to gather metadata or something wasn't consistent. We'll let other hosts know that and try again.
setStatus(new_status, inconsistency_error->displayText());
setStage(next_stage, inconsistency_error->displayText());
}
/// Two passes is minimum (we need to compare with table names with previous ones to be sure we don't miss anything).
if (pass >= 2)
{
if (std::chrono::steady_clock::now() > end_of_timeout)
if (std::chrono::steady_clock::now() > consistent_metadata_snapshot_end_time)
inconsistency_error->rethrow();
else
LOG_WARNING(log, "{}", inconsistency_error->displayText());
@ -239,6 +236,7 @@ bool BackupEntriesCollector::tryGatherMetadataAndCompareWithPrevious(std::option
table_infos.clear();
gatherDatabasesMetadata();
gatherTablesMetadata();
lockTablesForReading();
}
catch (Exception & e)
{
@ -526,12 +524,11 @@ void BackupEntriesCollector::lockTablesForReading()
for (auto & [table_name, table_info] : table_infos)
{
auto storage = table_info.storage;
TableLockHolder table_lock;
if (storage)
{
try
{
table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
}
catch (Exception & e)
{
@ -712,7 +709,7 @@ void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableN
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
backup_entries.emplace_back(file_name, backup_entry);
}
@ -724,21 +721,21 @@ void BackupEntriesCollector::addBackupEntry(const std::pair<String, BackupEntryP
void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
insertAtEnd(backup_entries, backup_entries_);
}
void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
insertAtEnd(backup_entries, std::move(backup_entries_));
}
void BackupEntriesCollector::addPostTask(std::function<void()> task)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of post tasks is not allowed");
post_tasks.push(std::move(task));
}

View File

@ -86,12 +86,13 @@ private:
void runPostTasks();
Strings setStatus(const String & new_status, const String & message = "");
Strings setStage(const String & new_stage, const String & message = "");
const ASTBackupQuery::Elements backup_query_elements;
const BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
ContextPtr context;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds consistent_metadata_snapshot_timeout;
Poco::Logger * log;
@ -129,8 +130,8 @@ private:
std::optional<ASTs> partitions;
};
String current_status;
std::chrono::steady_clock::time_point consistent_metadata_snapshot_start_time;
String current_stage;
std::chrono::steady_clock::time_point consistent_metadata_snapshot_end_time;
std::unordered_map<String, DatabaseInfo> database_infos;
std::unordered_map<QualifiedTableName, TableInfo> table_infos;
std::vector<std::pair<String, String>> previous_databases_metadata;

View File

@ -5,6 +5,7 @@
#include <Backups/BackupUtils.h>
#include <Backups/IBackupEntry.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupCoordinationRemote.h>
#include <Backups/BackupCoordinationLocal.h>
#include <Backups/RestoreCoordinationRemote.h>
@ -18,7 +19,6 @@
#include <Common/Exception.h>
#include <Common/Macros.h>
#include <Common/logger_useful.h>
#include <Common/scope_guard_safe.h>
#include <Common/setThreadName.h>
@ -30,25 +30,79 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
namespace Stage = BackupCoordinationStage;
namespace
{
/// Coordination status meaning that a host finished its work.
constexpr const char * kCompletedCoordinationStatus = "completed";
/// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendErrorToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
std::shared_ptr<IBackupCoordination> makeBackupCoordination(const String & coordination_zk_path, const ContextPtr & context, bool is_internal_backup)
{
if (!coordination_zk_path.empty())
{
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
return std::make_shared<BackupCoordinationRemote>(coordination_zk_path, get_zookeeper, !is_internal_backup);
}
else
{
return std::make_shared<BackupCoordinationLocal>();
}
}
std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(const String & coordination_zk_path, const ContextPtr & context, bool is_internal_backup)
{
if (!coordination_zk_path.empty())
{
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
return std::make_shared<RestoreCoordinationRemote>(coordination_zk_path, get_zookeeper, !is_internal_backup);
}
else
{
return std::make_shared<RestoreCoordinationLocal>();
}
}
/// Sends information about an exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host, const Exception & exception)
{
if (!coordination)
return;
try
{
coordination->setErrorStatus(current_host, Exception{getCurrentExceptionCode(), getCurrentExceptionMessage(true, true)});
if (coordination)
coordination->setError(current_host, exception);
}
catch (...)
{
}
}
/// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
{
try
{
throw;
}
catch (const Exception & e)
{
sendExceptionToCoordination(coordination, current_host, e);
}
catch (...)
{
coordination->setError(current_host, Exception{getCurrentExceptionCode(), getCurrentExceptionMessage(true, true)});
}
}
/// Used to change num_active_backups.
size_t getNumActiveBackupsChange(BackupStatus status)
{
return status == BackupStatus::MAKING_BACKUP;
}
/// Used to change num_active_restores.
size_t getNumActiveRestoresChange(BackupStatus status)
{
return status == BackupStatus::RESTORING;
}
}
@ -60,7 +114,8 @@ BackupsWorker::BackupsWorker(size_t num_backup_threads, size_t num_restore_threa
/// We set max_free_threads = 0 because we don't want to keep any threads if there is no BACKUP or RESTORE query running right now.
}
UUID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context)
std::pair<UUID, bool> BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context)
{
const ASTBackupQuery & backup_query = typeid_cast<const ASTBackupQuery &>(*backup_or_restore_query);
if (backup_query.kind == ASTBackupQuery::Kind::BACKUP)
@ -70,316 +125,402 @@ UUID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutable
}
UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & context)
std::pair<UUID, bool> BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & context)
{
auto backup_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto backup_settings = BackupSettings::fromBackupQuery(*backup_query);
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
bool on_cluster = !backup_query->cluster.empty();
if (!backup_settings.backup_uuid)
backup_settings.backup_uuid = UUIDHelpers::generateV4();
UUID backup_uuid = *backup_settings.backup_uuid;
/// Prepare context to use.
ContextPtr context_in_use = context;
ContextMutablePtr mutable_context;
if (on_cluster || backup_settings.async)
std::shared_ptr<IBackupCoordination> backup_coordination;
if (backup_settings.internal)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = mutable_context = Context::createCopy(context);
/// The following call of makeBackupCoordination() is not essential because doBackup() will later create a backup coordination
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
/// if an exception will be thrown in startMakingBackup() other hosts will know about that.
backup_coordination = makeBackupCoordination(backup_settings.coordination_zk_path, context, backup_settings.internal);
}
addInfo(backup_uuid, backup_info.toString(), BackupStatus::MAKING_BACKUP, backup_settings.internal);
try
{
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
addInfo(backup_uuid, backup_settings.internal, backup_info.toString(), BackupStatus::MAKING_BACKUP);
auto job = [this,
/// Prepare context to use.
ContextPtr context_in_use = context;
ContextMutablePtr mutable_context;
bool on_cluster = !backup_query->cluster.empty();
if (on_cluster || backup_settings.async)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = mutable_context = Context::createCopy(context);
}
if (backup_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, backup_uuid, backup_query, backup_settings, backup_info, backup_coordination, context_in_use, mutable_context] {
doBackup(
backup_uuid,
backup_query,
backup_settings,
backup_info,
backup_coordination,
context_in_use,
mutable_context,
/* called_async= */ true);
});
}
else
{
doBackup(
backup_uuid,
backup_query,
backup_settings,
backup_info,
on_cluster,
backup_coordination,
context_in_use,
mutable_context](bool async) mutable
mutable_context,
/* called_async= */ false);
}
return {backup_uuid, backup_settings.internal};
}
catch (...)
{
std::optional<CurrentThread::QueryScope> query_scope;
std::shared_ptr<IBackupCoordination> backup_coordination;
SCOPE_EXIT_SAFE(if (backup_coordination && !backup_settings.internal) backup_coordination->drop(););
try
{
if (async)
{
query_scope.emplace(mutable_context);
setThreadName("BackupWorker");
}
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context_in_use->checkAccess(required_access);
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context_in_use->getMacros()->expand(backup_query->cluster);
cluster = context_in_use->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(backup_uuid);
}
}
/// Make a backup coordination.
if (!backup_settings.coordination_zk_path.empty())
{
backup_coordination = std::make_shared<BackupCoordinationRemote>(
backup_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
backup_coordination = std::make_shared<BackupCoordinationLocal>();
}
/// Opens a backup for writing.
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context_in_use;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup_create_params.backup_uuid = backup_uuid;
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = required_access;
backup_settings.copySettingsToQuery(*backup_query);
// executeDDLQueryOnCluster() will return without waiting for completion
mutable_context->setSetting("distributed_ddl_task_timeout", Field{0});
mutable_context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(backup_query, mutable_context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->waitStatus(all_hosts, kCompletedCoordinationStatus);
}
else
{
backup_query->setCurrentDatabase(context_in_use->getCurrentDatabase());
/// Prepare backup entries.
BackupEntries backup_entries;
{
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use};
backup_entries = backup_entries_collector.run();
}
/// Write the backup entries to the backup.
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
backup_coordination->setStatus(backup_settings.host_id, kCompletedCoordinationStatus, "");
}
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
backup->finalizeWriting();
/// Close the backup.
backup.reset();
setStatus(backup_uuid, BackupStatus::BACKUP_COMPLETE);
}
catch (...)
{
/// Something bad happened, the backup has not built.
setStatus(backup_uuid, BackupStatus::FAILED_TO_BACKUP);
sendErrorToCoordination(backup_coordination, backup_settings.host_id);
if (!async)
throw;
}
};
if (backup_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);
return backup_uuid;
/// Something bad happened, the backup has not built.
setStatus(backup_uuid, backup_settings.internal, BackupStatus::FAILED_TO_BACKUP);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
throw;
}
}
UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
void BackupsWorker::doBackup(
const UUID & backup_uuid,
const std::shared_ptr<ASTBackupQuery> & backup_query,
BackupSettings backup_settings,
const BackupInfo & backup_info,
std::shared_ptr<IBackupCoordination> backup_coordination,
const ContextPtr & context,
ContextMutablePtr mutable_context,
bool called_async)
{
std::optional<CurrentThread::QueryScope> query_scope;
try
{
if (called_async)
{
query_scope.emplace(mutable_context);
setThreadName("BackupWorker");
}
bool on_cluster = !backup_query->cluster.empty();
assert(mutable_context || (!on_cluster && !called_async));
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context->checkAccess(required_access);
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
cluster = context->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(backup_uuid);
}
}
/// Make a backup coordination.
if (!backup_coordination)
backup_coordination = makeBackupCoordination(backup_settings.coordination_zk_path, context, backup_settings.internal);
/// Opens a backup for writing.
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup_create_params.backup_uuid = backup_uuid;
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = required_access;
backup_settings.copySettingsToQuery(*backup_query);
// executeDDLQueryOnCluster() will return without waiting for completion
mutable_context->setSetting("distributed_ddl_task_timeout", Field{0});
mutable_context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(backup_query, mutable_context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->waitForStage(all_hosts, Stage::COMPLETED);
}
else
{
backup_query->setCurrentDatabase(context->getCurrentDatabase());
/// Prepare backup entries.
BackupEntries backup_entries;
{
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context};
backup_entries = backup_entries_collector.run();
}
/// Write the backup entries to the backup.
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
backup_coordination->setStage(backup_settings.host_id, Stage::COMPLETED, "");
}
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
backup->finalizeWriting();
/// Close the backup.
backup.reset();
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_info.toString());
setStatus(backup_uuid, backup_settings.internal, BackupStatus::BACKUP_COMPLETE);
}
catch (...)
{
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
setStatus(backup_uuid, backup_settings.internal, BackupStatus::FAILED_TO_BACKUP);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
}
else
{
/// setStatus() and sendCurrentExceptionToCoordination() will be called by startMakingBackup().
throw;
}
}
}
std::pair<UUID, bool> BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
{
UUID restore_uuid = UUIDHelpers::generateV4();
auto restore_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto restore_settings = RestoreSettings::fromRestoreQuery(*restore_query);
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
bool on_cluster = !restore_query->cluster.empty();
UUID restore_uuid = UUIDHelpers::generateV4();
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
if (restore_settings.async || on_cluster)
std::shared_ptr<IRestoreCoordination> restore_coordination;
if (restore_settings.internal)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = Context::createCopy(context);
/// The following call of makeRestoreCoordination() is not essential because doRestore() will later create a restore coordination
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
/// if an exception will be thrown in startRestoring() other hosts will know about that.
restore_coordination = makeRestoreCoordination(restore_settings.coordination_zk_path, context, restore_settings.internal);
}
addInfo(restore_uuid, backup_info.toString(), BackupStatus::RESTORING, restore_settings.internal);
try
{
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
addInfo(restore_uuid, restore_settings.internal, backup_info.toString(), BackupStatus::RESTORING);
auto job = [this,
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
bool on_cluster = !restore_query->cluster.empty();
if (restore_settings.async || on_cluster)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = Context::createCopy(context);
}
if (restore_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, restore_uuid, restore_query, restore_settings, backup_info, restore_coordination, context_in_use] {
doRestore(
restore_uuid,
restore_query,
restore_settings,
backup_info,
restore_coordination,
context_in_use,
/* called_async= */ true);
});
}
else
{
doRestore(
restore_uuid,
restore_query,
restore_settings,
backup_info,
on_cluster,
context_in_use](bool async) mutable
restore_coordination,
context_in_use,
/* called_async= */ false);
}
return {restore_uuid, restore_settings.internal};
}
catch (...)
{
std::optional<CurrentThread::QueryScope> query_scope;
std::shared_ptr<IRestoreCoordination> restore_coordination;
SCOPE_EXIT_SAFE(if (restore_coordination && !restore_settings.internal) restore_coordination->drop(););
try
{
if (async)
{
query_scope.emplace(context_in_use);
setThreadName("RestoreWorker");
}
/// Open the backup for reading.
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = context_in_use;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
String current_database = context_in_use->getCurrentDatabase();
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
if (on_cluster)
{
restore_query->cluster = context_in_use->getMacros()->expand(restore_query->cluster);
cluster = context_in_use->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host.
auto addresses = cluster->filterAddressesByShardOrReplica(restore_settings.shard_num, restore_settings.replica_num);
for (const auto * address : addresses)
{
restore_settings.host_id = address->toString();
auto restore_elements = restore_query->elements;
String addr_database = address->default_database.empty() ? current_database : address->default_database;
for (auto & element : restore_elements)
element.setCurrentDatabase(addr_database);
RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use};
dummy_restorer.run(RestorerFromBackup::CHECK_ACCESS_ONLY);
}
}
/// Make a restore coordination.
if (on_cluster && restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(restore_uuid);
}
if (!restore_settings.coordination_zk_path.empty())
{
restore_coordination = std::make_shared<RestoreCoordinationRemote>(
restore_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
restore_coordination = std::make_shared<RestoreCoordinationLocal>();
}
/// Do RESTORE.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
restore_settings.copySettingsToQuery(*restore_query);
// executeDDLQueryOnCluster() will return without waiting for completion
context_in_use->setSetting("distributed_ddl_task_timeout", Field{0});
context_in_use->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(restore_query, context_in_use, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
restore_coordination->waitStatus(all_hosts, kCompletedCoordinationStatus);
}
else
{
restore_query->setCurrentDatabase(current_database);
/// Restore metadata and prepare data restoring tasks.
DataRestoreTasks data_restore_tasks;
{
RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination,
backup, context_in_use};
data_restore_tasks = restorer.run(RestorerFromBackup::RESTORE);
}
/// Execute the data restoring tasks.
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
restore_coordination->setStatus(restore_settings.host_id, kCompletedCoordinationStatus, "");
}
setStatus(restore_uuid, BackupStatus::RESTORED);
}
catch (...)
{
/// Something bad happened, the backup has not built.
setStatus(restore_uuid, BackupStatus::FAILED_TO_RESTORE);
sendErrorToCoordination(restore_coordination, restore_settings.host_id);
if (!async)
throw;
}
};
if (restore_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);
return restore_uuid;
/// Something bad happened, the backup has not built.
setStatus(restore_uuid, restore_settings.internal, BackupStatus::FAILED_TO_RESTORE);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
throw;
}
}
void BackupsWorker::addInfo(const UUID & uuid, const String & backup_name, BackupStatus status, bool internal)
void BackupsWorker::doRestore(
const UUID & restore_uuid,
const std::shared_ptr<ASTBackupQuery> & restore_query,
RestoreSettings restore_settings,
const BackupInfo & backup_info,
std::shared_ptr<IRestoreCoordination> restore_coordination,
ContextMutablePtr context,
bool called_async)
{
std::optional<CurrentThread::QueryScope> query_scope;
try
{
if (called_async)
{
query_scope.emplace(context);
setThreadName("RestoreWorker");
}
/// Open the backup for reading.
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = context;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
String current_database = context->getCurrentDatabase();
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
bool on_cluster = !restore_query->cluster.empty();
if (on_cluster)
{
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
cluster = context->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host.
auto addresses = cluster->filterAddressesByShardOrReplica(restore_settings.shard_num, restore_settings.replica_num);
for (const auto * address : addresses)
{
restore_settings.host_id = address->toString();
auto restore_elements = restore_query->elements;
String addr_database = address->default_database.empty() ? current_database : address->default_database;
for (auto & element : restore_elements)
element.setCurrentDatabase(addr_database);
RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context};
dummy_restorer.run(RestorerFromBackup::CHECK_ACCESS_ONLY);
}
}
/// Make a restore coordination.
if (on_cluster && restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(restore_uuid);
}
if (!restore_coordination)
restore_coordination = makeRestoreCoordination(restore_settings.coordination_zk_path, context, restore_settings.internal);
/// Do RESTORE.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
restore_settings.copySettingsToQuery(*restore_query);
// executeDDLQueryOnCluster() will return without waiting for completion
context->setSetting("distributed_ddl_task_timeout", Field{0});
context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(restore_query, context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
restore_coordination->waitForStage(all_hosts, Stage::COMPLETED);
}
else
{
restore_query->setCurrentDatabase(current_database);
/// Restore metadata and prepare data restoring tasks.
DataRestoreTasks data_restore_tasks;
{
RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination,
backup, context};
data_restore_tasks = restorer.run(RestorerFromBackup::RESTORE);
}
/// Execute the data restoring tasks.
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
}
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString());
setStatus(restore_uuid, restore_settings.internal, BackupStatus::RESTORED);
}
catch (...)
{
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
setStatus(restore_uuid, restore_settings.internal, BackupStatus::FAILED_TO_RESTORE);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
}
else
{
/// setStatus() and sendCurrentExceptionToCoordination() will be called by startRestoring().
throw;
}
}
}
void BackupsWorker::addInfo(const UUID & uuid, bool internal, const String & backup_name, BackupStatus status)
{
Info info;
info.uuid = uuid;
@ -387,48 +528,41 @@ void BackupsWorker::addInfo(const UUID & uuid, const String & backup_name, Backu
info.status = status;
info.status_changed_time = time(nullptr);
info.internal = internal;
std::lock_guard lock{infos_mutex};
infos[uuid] = std::move(info);
bool inserted = infos.try_emplace({uuid, internal}, std::move(info)).second;
if (!inserted)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Pair of UUID={} and internal={} is already in use", uuid, internal);
num_active_backups += getNumActiveBackupsChange(status);
num_active_restores += getNumActiveRestoresChange(status);
}
void BackupsWorker::setStatus(const UUID & uuid, BackupStatus status)
void BackupsWorker::setStatus(const UUID & uuid, bool internal, BackupStatus status)
{
std::lock_guard lock{infos_mutex};
auto & info = infos.at(uuid);
auto it = infos.find({uuid, internal});
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown pair of UUID={} and internal={}", uuid, internal);
auto & info = it->second;
auto old_status = info.status;
info.status = status;
info.status_changed_time = time(nullptr);
if (status == BackupStatus::BACKUP_COMPLETE)
{
LOG_INFO(log, "{} {} was created successfully", (info.internal ? "Internal backup" : "Backup"), info.backup_name);
}
else if (status == BackupStatus::RESTORED)
{
LOG_INFO(log, "Restored from {} {} successfully", (info.internal ? "internal backup" : "backup"), info.backup_name);
}
else if ((status == BackupStatus::FAILED_TO_BACKUP) || (status == BackupStatus::FAILED_TO_RESTORE))
{
String start_of_message;
if (status == BackupStatus::FAILED_TO_BACKUP)
start_of_message = fmt::format("Failed to create {} {}", (info.internal ? "internal backup" : "backup"), info.backup_name);
else
start_of_message = fmt::format("Failed to restore from {} {}", (info.internal ? "internal backup" : "backup"), info.backup_name);
tryLogCurrentException(log, start_of_message);
info.error_message = getCurrentExceptionMessage(false);
info.exception = std::current_exception();
}
num_active_backups += getNumActiveBackupsChange(status) - getNumActiveBackupsChange(old_status);
num_active_restores += getNumActiveRestoresChange(status) - getNumActiveRestoresChange(old_status);
}
void BackupsWorker::wait(const UUID & backup_or_restore_uuid, bool rethrow_exception)
void BackupsWorker::wait(const UUID & backup_or_restore_uuid, bool internal, bool rethrow_exception)
{
std::unique_lock lock{infos_mutex};
status_changed.wait(lock, [&]
{
auto it = infos.find(backup_or_restore_uuid);
auto it = infos.find({backup_or_restore_uuid, internal});
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: Unknown UUID {}", toString(backup_or_restore_uuid));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown pair of UUID={} and internal={}", backup_or_restore_uuid, internal);
const auto & info = it->second;
auto current_status = info.status;
if (rethrow_exception && ((current_status == BackupStatus::FAILED_TO_BACKUP) || (current_status == BackupStatus::FAILED_TO_RESTORE)))
@ -437,12 +571,12 @@ void BackupsWorker::wait(const UUID & backup_or_restore_uuid, bool rethrow_excep
});
}
BackupsWorker::Info BackupsWorker::getInfo(const UUID & backup_or_restore_uuid) const
BackupsWorker::Info BackupsWorker::getInfo(const UUID & backup_or_restore_uuid, bool internal) const
{
std::lock_guard lock{infos_mutex};
auto it = infos.find(backup_or_restore_uuid);
auto it = infos.find({backup_or_restore_uuid, internal});
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: Unknown UUID {}", toString(backup_or_restore_uuid));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown pair of UUID={} and internal={}", backup_or_restore_uuid, internal);
return it->second;
}
@ -457,14 +591,15 @@ std::vector<BackupsWorker::Info> BackupsWorker::getAllInfos() const
void BackupsWorker::shutdown()
{
size_t num_active_backups = backups_thread_pool.active();
size_t num_active_restores = restores_thread_pool.active();
if (!num_active_backups && !num_active_restores)
return;
LOG_INFO(log, "Waiting for {} backup and {} restore tasks to be finished", num_active_backups, num_active_restores);
bool has_active_backups_or_restores = (num_active_backups || num_active_restores);
if (has_active_backups_or_restores)
LOG_INFO(log, "Waiting for {} backups and {} restores to be finished", num_active_backups, num_active_restores);
backups_thread_pool.wait();
restores_thread_pool.wait();
LOG_INFO(log, "All backup and restore tasks have finished");
if (has_active_backups_or_restores)
LOG_INFO(log, "All backup and restore tasks have finished");
}
}

View File

@ -11,6 +11,13 @@ namespace Poco::Util { class AbstractConfiguration; }
namespace DB
{
class ASTBackupQuery;
struct BackupSettings;
struct RestoreSettings;
struct BackupInfo;
class IBackupCoordination;
class IRestoreCoordination;
/// Manager of backups and restores: executes backups and restores' threads in the background.
/// Keeps information about backups and restores started in this session.
class BackupsWorker
@ -22,11 +29,11 @@ public:
void shutdown();
/// Starts executing a BACKUP or RESTORE query. Returns UUID of the operation.
UUID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
std::pair<UUID, bool> start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
/// Waits until a BACKUP or RESTORE query started by start() is finished.
/// The function returns immediately if the operation is already finished.
void wait(const UUID & backup_or_restore_uuid, bool rethrow_exception = true);
void wait(const UUID & backup_or_restore_uuid, bool internal, bool rethrow_exception = true);
/// Information about executing a BACKUP or RESTORE query started by calling start().
struct Info
@ -47,21 +54,32 @@ public:
bool internal = false;
};
Info getInfo(const UUID & backup_or_restore_uuid) const;
Info getInfo(const UUID & backup_or_restore_uuid, bool internal) const;
std::vector<Info> getAllInfos() const;
private:
UUID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
UUID startRestoring(const ASTPtr & query, ContextMutablePtr context);
std::pair<UUID, bool> startMakingBackup(const ASTPtr & query, const ContextPtr & context);
void addInfo(const UUID & uuid, const String & backup_name, BackupStatus status, bool internal);
void setStatus(const UUID & uuid, BackupStatus status);
void doBackup(const UUID & backup_uuid, const std::shared_ptr<ASTBackupQuery> & backup_query, BackupSettings backup_settings,
const BackupInfo & backup_info, std::shared_ptr<IBackupCoordination> backup_coordination, const ContextPtr & context,
ContextMutablePtr mutable_context, bool called_async);
std::pair<UUID, bool> startRestoring(const ASTPtr & query, ContextMutablePtr context);
void doRestore(const UUID & restore_uuid, const std::shared_ptr<ASTBackupQuery> & restore_query, RestoreSettings restore_settings,
const BackupInfo & backup_info, std::shared_ptr<IRestoreCoordination> restore_coordination, ContextMutablePtr context,
bool called_async);
void addInfo(const UUID & uuid, bool internal, const String & backup_name, BackupStatus status);
void setStatus(const UUID & uuid, bool internal, BackupStatus status);
ThreadPool backups_thread_pool;
ThreadPool restores_thread_pool;
std::unordered_map<UUID, Info> infos;
std::map<std::pair<UUID, bool>, Info> infos;
std::condition_variable status_changed;
std::atomic<size_t> num_active_backups = 0;
std::atomic<size_t> num_active_restores = 0;
mutable std::mutex infos_mutex;
Poco::Logger * log;
};

View File

@ -18,11 +18,11 @@ class IBackupCoordination
public:
virtual ~IBackupCoordination() = default;
/// Sets the current status and waits for other hosts to come to this status too.
virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0;
virtual void setErrorStatus(const String & current_host, const Exception & exception) = 0;
virtual Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) = 0;
virtual Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) = 0;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
virtual void setError(const String & current_host, const Exception & exception) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
struct PartNameAndChecksum
{
@ -115,9 +115,6 @@ public:
/// Returns the list of all the archive suffixes which were generated.
virtual Strings getAllArchiveSuffixes() const = 0;
/// Removes remotely stored information.
virtual void drop() {}
};
}

View File

@ -16,11 +16,11 @@ class IRestoreCoordination
public:
virtual ~IRestoreCoordination() = default;
/// Sets the current status and waits for other hosts to come to this status too.
virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0;
virtual void setErrorStatus(const String & current_host, const Exception & exception) = 0;
virtual Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) = 0;
virtual Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) = 0;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
virtual void setError(const String & current_host, const Exception & exception) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
static constexpr const char * kErrorStatus = "error";
@ -34,9 +34,6 @@ public:
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
virtual bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) = 0;
/// Removes remotely stored information.
virtual void drop() {}
};
}

View File

@ -7,20 +7,20 @@ namespace DB
RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
void RestoreCoordinationLocal::setStatus(const String &, const String &, const String &)
void RestoreCoordinationLocal::setStage(const String &, const String &, const String &)
{
}
void RestoreCoordinationLocal::setErrorStatus(const String &, const Exception &)
void RestoreCoordinationLocal::setError(const String &, const Exception &)
{
}
Strings RestoreCoordinationLocal::waitStatus(const Strings &, const String &)
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &)
{
return {};
}
Strings RestoreCoordinationLocal::waitStatusFor(const Strings &, const String &, UInt64)
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
{
return {};
}

View File

@ -18,11 +18,11 @@ public:
RestoreCoordinationLocal();
~RestoreCoordinationLocal() override;
/// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts.
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;

View File

@ -6,57 +6,86 @@
namespace DB
{
RestoreCoordinationRemote::RestoreCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
RestoreCoordinationRemote::RestoreCoordinationRemote(
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("RestoreCoordination"))
, remove_zk_nodes_in_destructor(remove_zk_nodes_in_destructor_)
{
createRootNodes();
stage_sync.emplace(
zookeeper_path_ + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("RestoreCoordination"));
}
RestoreCoordinationRemote::~RestoreCoordinationRemote() = default;
RestoreCoordinationRemote::~RestoreCoordinationRemote()
{
try
{
if (remove_zk_nodes_in_destructor)
removeAllNodes();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
zkutil::ZooKeeperPtr RestoreCoordinationRemote::getZooKeeper() const
{
std::lock_guard lock{mutex};
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
}
return zookeeper;
}
void RestoreCoordinationRemote::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
auto zk = getZooKeeper();
zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path, "");
zk->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
}
void RestoreCoordinationRemote::setStatus(const String & current_host, const String & new_status, const String & message)
void RestoreCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
{
status_sync.set(current_host, new_status, message);
stage_sync->set(current_host, new_stage, message);
}
void RestoreCoordinationRemote::setErrorStatus(const String & current_host, const Exception & exception)
void RestoreCoordinationRemote::setError(const String & current_host, const Exception & exception)
{
status_sync.setError(current_host, exception);
stage_sync->setError(current_host, exception);
}
Strings RestoreCoordinationRemote::waitStatus(const Strings & all_hosts, const String & status_to_wait)
Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
{
return status_sync.wait(all_hosts, status_to_wait);
return stage_sync->wait(all_hosts, stage_to_wait);
}
Strings RestoreCoordinationRemote::waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return status_sync.waitFor(all_hosts, status_to_wait, timeout_ms);
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -65,10 +94,10 @@ bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const S
bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -77,10 +106,10 @@ bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const St
bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -89,13 +118,15 @@ bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & ac
void RestoreCoordinationRemote::removeAllNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->removeRecursive(zookeeper_path);
}
/// Usually this function is called by the initiator when a restore operation is complete so we don't need the coordination anymore.
///
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
/// while some hosts are still restoring something. Removing all the nodes will remove the parent node of the restore coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some part
/// of their restore work before that.
void RestoreCoordinationRemote::drop()
{
removeAllNodes();
auto zk = getZooKeeper();
zk->removeRecursive(zookeeper_path);
}
}

View File

@ -1,7 +1,7 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationStatusSync.h>
#include <Backups/BackupCoordinationStageSync.h>
namespace DB
@ -11,14 +11,14 @@ namespace DB
class RestoreCoordinationRemote : public IRestoreCoordination
{
public:
RestoreCoordinationRemote(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper);
RestoreCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_);
~RestoreCoordinationRemote() override;
/// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts.
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
@ -31,10 +31,8 @@ public:
/// The function returns false if this access storage is being already restored by another replica.
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
/// Removes remotely stored information.
void drop() override;
private:
zkutil::ZooKeeperPtr getZooKeeper() const;
void createRootNodes();
void removeAllNodes();
@ -42,7 +40,12 @@ private:
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
BackupCoordinationStatusSync status_sync;
const bool remove_zk_nodes_in_destructor;
std::optional<BackupCoordinationStageSync> stage_sync;
mutable std::mutex mutex;
mutable zkutil::ZooKeeperPtr zookeeper;
};
}

View File

@ -1,5 +1,6 @@
#include <Backups/RestorerFromBackup.h>
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupSettings.h>
#include <Backups/IBackup.h>
#include <Backups/IBackupEntry.h>
@ -38,20 +39,10 @@ namespace ErrorCodes
}
namespace Stage = BackupCoordinationStage;
namespace
{
/// Finding databases and tables in the backup which we're going to restore.
constexpr const char * kFindingTablesInBackupStatus = "finding tables in backup";
/// Creating databases or finding them and checking their definitions.
constexpr const char * kCreatingDatabasesStatus = "creating databases";
/// Creating tables or finding them and checking their definition.
constexpr const char * kCreatingTablesStatus = "creating tables";
/// Inserting restored data to tables.
constexpr const char * kInsertingDataToTablesStatus = "inserting data to tables";
/// Uppercases the first character of a passed string.
String toUpperFirst(const String & str)
{
@ -102,6 +93,7 @@ RestorerFromBackup::RestorerFromBackup(
, restore_coordination(restore_coordination_)
, backup(backup_)
, context(context_)
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000))
, log(&Poco::Logger::get("RestorerFromBackup"))
{
@ -112,7 +104,7 @@ RestorerFromBackup::~RestorerFromBackup() = default;
RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
{
/// run() can be called onle once.
if (!current_status.empty())
if (!current_stage.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring");
/// Find other hosts working along with us to execute this ON CLUSTER query.
@ -126,7 +118,7 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
findRootPathsInBackup();
/// Find all the databases and tables which we will read from the backup.
setStatus(kFindingTablesInBackupStatus);
setStage(Stage::FINDING_TABLES_IN_BACKUP);
findDatabasesAndTablesInBackup();
/// Check access rights.
@ -136,27 +128,31 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
return {};
/// Create databases using the create queries read from the backup.
setStatus(kCreatingDatabasesStatus);
setStage(Stage::CREATING_DATABASES);
createDatabases();
/// Create tables using the create queries read from the backup.
setStatus(kCreatingTablesStatus);
setStage(Stage::CREATING_TABLES);
createTables();
/// All what's left is to insert data to tables.
/// No more data restoring tasks are allowed after this point.
setStatus(kInsertingDataToTablesStatus);
setStage(Stage::INSERTING_DATA_TO_TABLES);
return getDataRestoreTasks();
}
void RestorerFromBackup::setStatus(const String & new_status, const String & message)
void RestorerFromBackup::setStage(const String & new_stage, const String & message)
{
LOG_TRACE(log, "{}", toUpperFirst(new_status));
current_status = new_status;
LOG_TRACE(log, "{}", toUpperFirst(new_stage));
current_stage = new_stage;
if (restore_coordination)
{
restore_coordination->setStatus(restore_settings.host_id, new_status, message);
restore_coordination->waitStatus(all_hosts, new_status);
restore_coordination->setStage(restore_settings.host_id, new_stage, message);
if (new_stage == Stage::FINDING_TABLES_IN_BACKUP)
restore_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
else
restore_coordination->waitForStage(all_hosts, new_stage);
}
}
@ -814,14 +810,14 @@ std::vector<QualifiedTableName> RestorerFromBackup::findTablesWithoutDependencie
void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task)
{
if (current_status == kInsertingDataToTablesStatus)
if (current_stage == Stage::INSERTING_DATA_TO_TABLES)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of data-restoring tasks is not allowed");
data_restore_tasks.push_back(std::move(new_task));
}
void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks)
{
if (current_status == kInsertingDataToTablesStatus)
if (current_stage == Stage::INSERTING_DATA_TO_TABLES)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of data-restoring tasks is not allowed");
insertAtEnd(data_restore_tasks, std::move(new_tasks));
}

View File

@ -73,6 +73,7 @@ private:
std::shared_ptr<IRestoreCoordination> restore_coordination;
BackupPtr backup;
ContextMutablePtr context;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds create_table_timeout;
Poco::Logger * log;
@ -100,7 +101,7 @@ private:
DataRestoreTasks getDataRestoreTasks();
void setStatus(const String & new_status, const String & message = "");
void setStage(const String & new_stage, const String & message = "");
struct DatabaseInfo
{
@ -124,7 +125,7 @@ private:
std::vector<QualifiedTableName> findTablesWithoutDependencies() const;
String current_status;
String current_stage;
std::unordered_map<String, DatabaseInfo> database_infos;
std::map<QualifiedTableName, TableInfo> table_infos;
std::vector<DataRestoreTask> data_restore_tasks;

View File

@ -346,6 +346,12 @@ set_source_files_properties(
Columns/ColumnString.cpp
PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}")
if (ENABLE_QPL)
set_source_files_properties(
Compression/CompressionCodecDeflateQpl.cpp
PROPERTIES COMPILE_FLAGS "-mwaitpkg")
endif ()
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2_st)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2)
@ -530,6 +536,10 @@ endif ()
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)
if (TARGET ch_contrib::qpl)
dbms_target_link_libraries(PUBLIC ch_contrib::qpl)
endif ()
dbms_target_link_libraries(PRIVATE _boost_context)
if (ENABLE_NLP)

View File

@ -152,7 +152,6 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src)
auto & dst_column_host_name = typeid_cast<ColumnString &>(*mutable_columns[name_pos["host_name"]]);
auto & dst_array_current_time = typeid_cast<ColumnUInt32 &>(*mutable_columns[name_pos["current_time"]]).getData();
// auto & dst_array_thread_id = typeid_cast<ColumnUInt64 &>(*mutable_columns[name_pos["thread_id"]]).getData();
auto & dst_array_type = typeid_cast<ColumnInt8 &>(*mutable_columns[name_pos["type"]]).getData();
auto & dst_column_name = typeid_cast<ColumnString &>(*mutable_columns[name_pos["name"]]);
auto & dst_array_value = typeid_cast<ColumnInt64 &>(*mutable_columns[name_pos["value"]]).getData();

View File

@ -329,9 +329,9 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
case 0:
{
const auto r = fuzz_rand() % 3;
def.frame_type = r == 0 ? WindowFrame::FrameType::Rows
: r == 1 ? WindowFrame::FrameType::Range
: WindowFrame::FrameType::Groups;
def.frame_type = r == 0 ? WindowFrame::FrameType::ROWS
: r == 1 ? WindowFrame::FrameType::RANGE
: WindowFrame::FrameType::GROUPS;
break;
}
case 1:
@ -385,7 +385,7 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
break;
}
if (def.frame_type == WindowFrame::FrameType::Range
if (def.frame_type == WindowFrame::FrameType::RANGE
&& def.frame_begin_type == WindowFrame::BoundaryType::Unbounded
&& def.frame_begin_preceding
&& def.frame_end_type == WindowFrame::BoundaryType::Current)

View File

@ -132,14 +132,12 @@ namespace
ColumnLowCardinality::ColumnLowCardinality(MutableColumnPtr && column_unique_, MutableColumnPtr && indexes_, bool is_shared)
: dictionary(std::move(column_unique_), is_shared), idx(std::move(indexes_))
{
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insert(const Field & x)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x));
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertDefault()
@ -167,15 +165,12 @@ void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
const auto & nested = *low_cardinality_src->getDictionary().getNestedColumn();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position));
}
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n));
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -205,7 +200,6 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size());
idx.insertPositionsRange(*inserted_indexes->index(*sub_idx, 0), 0, length);
}
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length)
@ -213,7 +207,6 @@ void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length);
idx.insertPositionsRange(*inserted_indexes, 0, length);
// idx.check(getDictionary().size());
}
static void checkPositionsAreLimited(const IColumn & positions, UInt64 limit)
@ -254,14 +247,12 @@ void ColumnLowCardinality::insertRangeFromDictionaryEncodedColumn(const IColumn
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(keys, 0, keys.size());
idx.insertPositionsRange(*inserted_indexes->index(positions, 0), 0, positions.size());
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertData(const char * pos, size_t length)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length));
// idx.check(getDictionary().size());
}
StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
@ -276,7 +267,6 @@ const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * po
const char * new_pos;
idx.insertPosition(dictionary.getColumnUnique().uniqueDeserializeAndInsertFromArena(pos, new_pos));
// idx.check(getDictionary().size());
return new_pos;
}

View File

@ -273,14 +273,6 @@ llvm::Value * ColumnNullable::compileComparator(llvm::IRBuilderBase & builder, l
b.CreateCondBr(lhs_or_rhs_are_null, lhs_or_rhs_are_null_block, lhs_rhs_are_not_null_block);
// if (unlikely(lval_is_null || rval_is_null))
// {
// if (lval_is_null && rval_is_null)
// return 0;
// else
// return lval_is_null ? null_direction_hint : -null_direction_hint;
// }
b.SetInsertPoint(lhs_or_rhs_are_null_block);
auto * lhs_equals_rhs_result = llvm::ConstantInt::getSigned(b.getInt8Ty(), 0);
llvm::Value * lhs_and_rhs_are_null = b.CreateAnd(lhs_is_null_value, rhs_is_null_value);
@ -288,8 +280,6 @@ llvm::Value * ColumnNullable::compileComparator(llvm::IRBuilderBase & builder, l
llvm::Value * lhs_or_rhs_are_null_block_result = b.CreateSelect(lhs_and_rhs_are_null, lhs_equals_rhs_result, lhs_is_null_result);
b.CreateBr(join_block);
// getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
b.SetInsertPoint(lhs_rhs_are_not_null_block);
llvm::Value * lhs_rhs_are_not_null_block_result
= nested_column->compileComparator(builder, lhs_unwrapped_value, rhs_unwrapped_value, nan_direction_hint);

View File

@ -548,7 +548,6 @@ MutableColumnPtr ColumnUnique<ColumnType>::uniqueInsertRangeImpl(
}
}
// checkIndexes(*positions_column, column->size() + (overflowed_keys ? overflowed_keys->size() : 0));
return std::move(positions_column);
}

View File

@ -514,8 +514,6 @@ private:
return allocateFromFreeRegion(*free_region, size);
}
// std::cerr << "Requested size: " << size << "\n";
/// Evict something from cache and continue.
while (true)
{

View File

@ -73,7 +73,7 @@ bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const S
/// Check that the right configuration has the same set of subkeys as the left configuration.
Poco::Util::AbstractConfiguration::Keys right_subkeys;
right.keys(right_key, right_subkeys);
std::unordered_set<StringRef> left_subkeys{subkeys.begin(), subkeys.end()};
std::unordered_set<std::string_view> left_subkeys{subkeys.begin(), subkeys.end()};
if ((left_subkeys.size() != right_subkeys.size()) || (left_subkeys.size() != subkeys.size()))
return false;
for (const auto & right_subkey : right_subkeys)

View File

@ -122,9 +122,6 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
values.time_at_offset_change_value = (transition.from - cctz::civil_second(date)) / Values::OffsetChangeFactor;
values.amount_of_offset_change_value = (transition.to - transition.from) / Values::OffsetChangeFactor;
// std::cerr << time_zone << ", " << date << ": change from " << transition.from << " to " << transition.to << "\n";
// std::cerr << time_zone << ", " << date << ": change at " << values.time_at_offset_change() << " with " << values.amount_of_offset_change() << "\n";
/// We don't support too large changes.
if (values.amount_of_offset_change_value > 24 * 4)
values.amount_of_offset_change_value = 24 * 4;

View File

@ -74,7 +74,6 @@ public:
using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
class Reader final : private Cell::State
{
@ -247,39 +246,6 @@ public:
}
}
/// Same, but return false if it's full.
bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted)
{
Cell * res = findCell(x);
it = iteratorTo(res);
inserted = res == buf + m_size;
if (inserted)
{
if (res == buf + capacity)
return false;
new(res) Cell(x, *this);
++m_size;
}
return true;
}
/// Copy the cell from another hash table. It is assumed that there was no such key in the table yet.
void ALWAYS_INLINE insertUnique(const Cell * cell)
{
memcpy(&buf[m_size], cell, sizeof(*cell));
++m_size;
}
void ALWAYS_INLINE insertUnique(Key x)
{
new(&buf[m_size]) Cell(x, *this);
++m_size;
}
iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); }
const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); }
@ -381,36 +347,3 @@ template
>
using SmallSet = SmallTable<Key, HashTableCell<Key, HashUnused>, capacity>;
template
<
typename Key,
typename Cell,
size_t capacity
>
class SmallMapTable : public SmallTable<Key, Cell, capacity>
{
public:
using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
mapped_type & ALWAYS_INLINE operator[](Key x)
{
typename SmallMapTable::iterator it;
bool inserted;
this->emplace(x, it, inserted);
new (&it->getMapped()) mapped_type();
return it->getMapped();
}
};
template
<
typename Key,
typename Mapped,
size_t capacity
>
using SmallMap = SmallMapTable<Key, HashMapCell<Key, Mapped, HashUnused>, capacity>;

View File

@ -355,8 +355,6 @@ private:
template <size_t PASS>
static inline void radixSortMSDInternal(Element * arr, size_t size, size_t limit)
{
// std::cerr << PASS << ", " << size << ", " << limit << "\n";
/// The beginning of every i-1-th bucket. 0th element will be equal to 1st.
/// Last element will point to array end.
std::unique_ptr<Element *[]> prev_buckets{new Element*[HISTOGRAM_SIZE + 1]};

View File

@ -15,20 +15,31 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
constexpr size_t StringHashTablePadRequirement = 8;
/// TLDList
TLDList::TLDList(size_t size)
: tld_container(size)
, pool(std::make_unique<Arena>(10 << 20))
{}
bool TLDList::insert(StringRef host)
, memory_pool(std::make_unique<Arena>())
{
bool inserted;
tld_container.emplace(DB::ArenaKeyHolder{host, *pool}, inserted);
return inserted;
/// StringHashTable requires padded to 8 bytes key,
/// and Arena (memory_pool here) does satisfies this,
/// since it has padding with 15 bytes at the right.
///
/// However, StringHashTable may reference -1 byte of the key,
/// so left padding is also required:
memory_pool->alignedAlloc(StringHashTablePadRequirement, StringHashTablePadRequirement);
}
bool TLDList::has(StringRef host) const
void TLDList::insert(const String & host, TLDType type)
{
return tld_container.has(host);
StringRef owned_host{memory_pool->insert(host.data(), host.size()), host.size()};
tld_container[owned_host] = type;
}
TLDType TLDList::lookup(StringRef host) const
{
if (auto it = tld_container.find(host); it != nullptr)
return it->getMapped();
return TLDType::TLD_NONE;
}
/// TLDListsHolder
@ -57,32 +68,44 @@ void TLDListsHolder::parseConfig(const std::string & top_level_domains_path, con
size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::string & path)
{
std::unordered_set<std::string> tld_list_tmp;
std::unordered_map<std::string, TLDType> tld_list_tmp;
ReadBufferFromFile in(path);
String line;
String buffer;
while (!in.eof())
{
readEscapedStringUntilEOL(line, in);
readEscapedStringUntilEOL(buffer, in);
if (!in.eof())
++in.position();
std::string_view line(buffer);
/// Skip comments
if (line.size() > 2 && line[0] == '/' && line[1] == '/')
if (line.starts_with("//"))
continue;
line = trim(line, [](char c) { return std::isspace(c); });
line = line.substr(0, line.rend() - std::find_if_not(line.rbegin(), line.rend(), ::isspace));
/// Skip empty line
if (line.empty())
continue;
tld_list_tmp.emplace(line);
/// Validate special symbols.
if (line.starts_with("*."))
{
line = line.substr(2);
tld_list_tmp.emplace(line, TLDType::TLD_ANY);
}
else if (line[0] == '!')
{
line = line.substr(1);
tld_list_tmp.emplace(line, TLDType::TLD_EXCLUDE);
}
else
tld_list_tmp.emplace(line, TLDType::TLD_REGULAR);
}
if (!in.eof())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not all list had been read", name);
TLDList tld_list(tld_list_tmp.size());
for (const auto & host : tld_list_tmp)
for (const auto & [host, type] : tld_list_tmp)
{
StringRef host_ref{host.data(), host.size()};
tld_list.insert(host_ref);
tld_list.insert(host, type);
}
size_t tld_list_size = tld_list.size();

View File

@ -2,7 +2,7 @@
#include <base/defines.h>
#include <base/StringRef.h>
#include <Common/HashTable/StringHashSet.h>
#include <Common/HashTable/StringHashMap.h>
#include <Common/Arena.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <mutex>
@ -12,25 +12,35 @@
namespace DB
{
enum TLDType
{
/// Does not exist marker
TLD_NONE,
/// For regular lines
TLD_REGULAR,
/// For asterisk (*)
TLD_ANY,
/// For exclamation mark (!)
TLD_EXCLUDE,
};
/// Custom TLD List
///
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashSet.
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashMap.
class TLDList
{
public:
using Container = StringHashSet<>;
using Container = StringHashMap<TLDType>;
explicit TLDList(size_t size);
/// Return true if the tld_container does not contains such element.
bool insert(StringRef host);
/// Check is there such TLD
bool has(StringRef host) const;
void insert(const String & host, TLDType type);
TLDType lookup(StringRef host) const;
size_t size() const { return tld_container.size(); }
private:
Container tld_container;
std::unique_ptr<Arena> pool;
std::unique_ptr<Arena> memory_pool;
};
class TLDListsHolder
@ -48,6 +58,11 @@ public:
/// - "//" -- comment,
/// - empty lines will be ignored.
///
/// Treats the following special symbols:
/// - "*"
/// - "!"
///
/// Format : https://github.com/publicsuffix/list/wiki/Format
/// Example: https://publicsuffix.org/list/public_suffix_list.dat
///
/// Return size of the list.

View File

@ -42,13 +42,14 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Int
char buffer[buf_size];
WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer);
StringRef query_id;
std::string_view query_id;
UInt64 thread_id;
if (CurrentThread::isInitialized())
{
query_id = StringRef(CurrentThread::getQueryId());
query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN);
query_id = CurrentThread::getQueryId();
if (query_id.size() > QUERY_ID_MAX_LEN)
query_id.remove_suffix(query_id.size() - QUERY_ID_MAX_LEN);
thread_id = CurrentThread::get().thread_id;
}
@ -59,8 +60,8 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Int
writeChar(false, out); /// true if requested to stop the collecting thread.
writeBinary(static_cast<uint8_t>(query_id.size), out);
out.write(query_id.data, query_id.size);
writeBinary(static_cast<uint8_t>(query_id.size()), out);
out.write(query_id.data(), query_id.size());
size_t stack_trace_size = stack_trace.getSize();
size_t stack_trace_offset = stack_trace.getOffset();

View File

@ -9,7 +9,6 @@
#include <Common/StringSearcher.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/UTF8Helpers.h>
#include <base/StringRef.h>
#include <base/unaligned.h>
/** Search for a substring in a string by Volnitsky's algorithm

View File

@ -99,6 +99,7 @@ try
std::cout << "list\n";
zk.list("/",
Coordination::ListRequestType::ALL,
[&](const ListResponse & response)
{
if (response.error != Coordination::Error::ZOK)

87
src/Common/base58.h Normal file
View File

@ -0,0 +1,87 @@
#pragma once
#include <climits>
#include <cstring>
namespace DB
{
inline size_t encodeBase58(const char8_t * src, char8_t * dst)
{
const char * base58_encoding_alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
size_t idx = 0;
for (; *src; ++src)
{
unsigned int carry = static_cast<unsigned char>(*src);
for (size_t j = 0; j < idx; ++j)
{
carry += static_cast<unsigned int>(dst[j] << 8);
dst[j] = static_cast<unsigned char>(carry % 58);
carry /= 58;
}
while (carry > 0)
{
dst[idx++] = static_cast<unsigned char>(carry % 58);
carry /= 58;
}
}
size_t c_idx = idx >> 1;
for (size_t i = 0; i < c_idx; ++i)
{
char s = base58_encoding_alphabet[static_cast<unsigned char>(dst[i])];
dst[i] = base58_encoding_alphabet[static_cast<unsigned char>(dst[idx - (i + 1)])];
dst[idx - (i + 1)] = s;
}
if ((idx & 1))
{
dst[c_idx] = base58_encoding_alphabet[static_cast<unsigned char>(dst[c_idx])];
}
dst[idx] = '\0';
return idx + 1;
}
inline size_t decodeBase58(const char8_t * src, char8_t * dst)
{
const signed char uint_max = UINT_MAX;
const signed char map_digits[128]
= {uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 0, 1, 2, 3, 4, 5, 6, 7, 8, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 9, 10, 11, 12, 13, 14, 15, 16, uint_max, 17, 18, 19, 20, 21, uint_max, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, uint_max, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, uint_max, uint_max, uint_max, uint_max, uint_max};
size_t idx = 0;
for (; *src; ++src)
{
unsigned int carry = map_digits[*src];
if (unlikely(carry == UINT_MAX))
{
return 0;
}
for (size_t j = 0; j < idx; ++j)
{
carry += static_cast<unsigned char>(dst[j]) * 58;
dst[j] = static_cast<unsigned char>(carry & 0xff);
carry >>= 8;
}
while (carry > 0)
{
dst[idx++] = static_cast<unsigned char>(carry & 0xff);
carry >>= 8;
}
}
size_t c_idx = idx >> 1;
for (size_t i = 0; i < c_idx; ++i)
{
char s = dst[i];
dst[i] = dst[idx - (i + 1)];
dst[idx - (i + 1)] = s;
}
dst[idx] = '\0';
return idx + 1;
}
}

View File

@ -1,5 +1,4 @@
#include <iostream>
#include <iomanip>
#include <Interpreters/AggregationCommon.h>
@ -33,22 +32,6 @@ int main(int, char **)
std::cerr << "dump: " << wb.str() << std::endl;
}
{
using Cont = SmallMap<int, std::string, 16>;
Cont cont;
cont.insert(Cont::value_type(1, "Hello, world!"));
cont[1] = "Goodbye.";
for (auto x : cont)
std::cerr << x.getKey() << " -> " << x.getMapped() << std::endl;
DB::WriteBufferFromOwnString wb;
cont.writeText(wb);
std::cerr << "dump: " << wb.str() << std::endl;
}
{
using Cont = SmallSet<DB::UInt128, 16>;
Cont cont;

View File

@ -14,8 +14,10 @@ namespace
template <typename T, typename... Ts> constexpr auto firstArg(T && x, Ts &&...) { return std::forward<T>(x); }
/// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor
template <typename T, typename... Ts> constexpr auto firstArg(fmt::basic_runtime<T> && data, Ts &&...) { return data.str.data(); }
}
[[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; };
[[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); };
}
/// Logs a message to a specified logger with that level.
/// If more than one argument is provided,
@ -25,20 +27,21 @@ namespace
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
{ \
const bool is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
auto _logger = ::getLogger(logger); \
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
(DB::CurrentThread::getGroup()->client_logs_level >= (priority)); \
if ((logger)->is((PRIORITY)) || is_clients_log) \
if (_logger->is((PRIORITY)) || _is_clients_log) \
{ \
std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \
if (auto channel = (logger)->getChannel()) \
if (auto _channel = _logger->getChannel()) \
{ \
std::string file_function; \
file_function += __FILE__; \
file_function += "; "; \
file_function += __PRETTY_FUNCTION__; \
Poco::Message poco_message((logger)->name(), formatted_message, \
Poco::Message poco_message(_logger->name(), formatted_message, \
(PRIORITY), file_function.c_str(), __LINE__); \
channel->log(poco_message); \
_channel->log(poco_message); \
} \
} \
} while (false)

Some files were not shown because too many files have changed in this diff Show More