Merge branch 'master' into keeper-version-check

This commit is contained in:
Antonio Andelic 2022-07-26 07:47:57 +00:00
commit 35b61cc94e
182 changed files with 2837 additions and 968 deletions

View File

@ -394,7 +394,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
@ -437,7 +437,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -477,7 +477,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -521,7 +521,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -560,7 +560,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
EOF
- name: Download json reports

View File

@ -102,6 +102,9 @@ jobs:
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{ runner.temp }}/style_check
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job

View File

@ -108,7 +108,7 @@ jobs:
- name: Style Check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 style_check.py
python3 style_check.py --no-push
- name: Cleanup
if: always()
run: |
@ -971,7 +971,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
@ -1020,7 +1020,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check (actions)
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
@ -1061,7 +1061,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1098,7 +1098,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release_database_ordinary
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseOrdinary, actions)
CHECK_NAME=Stateless tests (release, DatabaseOrdinary)
REPO_COPY=${{runner.temp}}/stateless_release_database_ordinary/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1135,7 +1135,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, s3 storage, actions)
CHECK_NAME=Stateless tests (release, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1172,7 +1172,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1209,7 +1209,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1248,7 +1248,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1287,7 +1287,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1326,7 +1326,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1365,7 +1365,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1404,7 +1404,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1441,7 +1441,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1480,7 +1480,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1519,7 +1519,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1558,7 +1558,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1597,7 +1597,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1636,7 +1636,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1678,7 +1678,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1715,7 +1715,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release_database_ordinary
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, DatabaseOrdinary, actions)
CHECK_NAME=Stateful tests (release, DatabaseOrdinary)
REPO_COPY=${{runner.temp}}/stateful_release_database_ordinary/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1752,7 +1752,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1789,7 +1789,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1826,7 +1826,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1863,7 +1863,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1900,7 +1900,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1937,7 +1937,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1977,7 +1977,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2017,7 +2017,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2053,7 +2053,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -2089,7 +2089,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -2125,7 +2125,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -2164,7 +2164,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -2202,7 +2202,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -2240,7 +2240,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -2278,7 +2278,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -2316,7 +2316,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -2354,7 +2354,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -2392,7 +2392,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -2430,7 +2430,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -2468,7 +2468,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
@ -2509,7 +2509,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (ASan, actions)
CHECK_NAME=AST fuzzer (ASan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
EOF
- name: Download json reports
@ -2545,7 +2545,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (TSan, actions)
CHECK_NAME=AST fuzzer (TSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
EOF
- name: Download json reports
@ -2581,7 +2581,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (UBSan, actions)
CHECK_NAME=AST fuzzer (UBSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
EOF
- name: Download json reports
@ -2617,7 +2617,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (MSan, actions)
CHECK_NAME=AST fuzzer (MSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
EOF
- name: Download json reports
@ -2653,7 +2653,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (debug, actions)
CHECK_NAME=AST fuzzer (debug)
REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse
EOF
- name: Download json reports
@ -2692,7 +2692,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (asan, actions)
CHECK_NAME=Unit tests (asan)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2728,7 +2728,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang, actions)
CHECK_NAME=Unit tests (release-clang)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2764,7 +2764,7 @@ jobs:
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc, actions)
# CHECK_NAME=Unit tests (release-gcc)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
@ -2800,7 +2800,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (tsan, actions)
CHECK_NAME=Unit tests (tsan)
REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse
EOF
- name: Download json reports
@ -2836,7 +2836,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (msan, actions)
CHECK_NAME=Unit tests (msan)
REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse
EOF
- name: Download json reports
@ -2872,7 +2872,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (ubsan, actions)
CHECK_NAME=Unit tests (ubsan)
REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse
EOF
- name: Download json reports

View File

@ -118,6 +118,9 @@ jobs:
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{ runner.temp }}/style_check
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job
@ -1026,7 +1029,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
@ -1075,7 +1078,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check (actions)
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
@ -1116,7 +1119,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1153,7 +1156,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_database_replicated
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseReplicated, actions)
CHECK_NAME=Stateless tests (release, DatabaseReplicated)
REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1192,7 +1195,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_database_replicated
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseReplicated, actions)
CHECK_NAME=Stateless tests (release, DatabaseReplicated)
REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1231,7 +1234,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_wide_parts
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, wide parts enabled, actions)
CHECK_NAME=Stateless tests (release, wide parts enabled)
REPO_COPY=${{runner.temp}}/stateless_wide_parts/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1268,7 +1271,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_s3_storage
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, s3 storage, actions)
CHECK_NAME=Stateless tests (release, s3 storage)
REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1305,7 +1308,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1342,7 +1345,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1381,7 +1384,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1420,7 +1423,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1459,7 +1462,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1498,7 +1501,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1537,7 +1540,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -1574,7 +1577,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1613,7 +1616,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1652,7 +1655,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1691,7 +1694,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -1730,7 +1733,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1769,7 +1772,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1808,7 +1811,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_flaky_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests flaky check (address, actions)
CHECK_NAME=Stateless tests flaky check (address)
REPO_COPY=${{runner.temp}}/stateless_flaky_asan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1844,7 +1847,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/tests_bugfix_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Tests bugfix validate check (actions)
CHECK_NAME=tests bugfix validate check
KILL_TIMEOUT=3600
REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse
EOF
@ -1866,12 +1869,12 @@ jobs:
TEMP_PATH="${TEMP_PATH}/integration" \
REPORTS_PATH="${REPORTS_PATH}/integration" \
python3 integration_test_check.py "Integration tests bugfix validate check" \
python3 integration_test_check.py "Integration $CHECK_NAME" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
TEMP_PATH="${TEMP_PATH}/stateless" \
REPORTS_PATH="${REPORTS_PATH}/stateless" \
python3 functional_test_check.py "Stateless tests bugfix validate check" "$KILL_TIMEOUT" \
python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv"
@ -1895,7 +1898,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1932,7 +1935,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1969,7 +1972,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2006,7 +2009,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2043,7 +2046,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2080,7 +2083,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2117,7 +2120,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -2157,7 +2160,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2197,7 +2200,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -2233,7 +2236,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -2269,7 +2272,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -2305,7 +2308,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -2344,7 +2347,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (ASan, actions)
CHECK_NAME=AST fuzzer (ASan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse
EOF
- name: Download json reports
@ -2380,7 +2383,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (TSan, actions)
CHECK_NAME=AST fuzzer (TSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse
EOF
- name: Download json reports
@ -2416,7 +2419,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (UBSan, actions)
CHECK_NAME=AST fuzzer (UBSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse
EOF
- name: Download json reports
@ -2452,7 +2455,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (MSan, actions)
CHECK_NAME=AST fuzzer (MSan)
REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse
EOF
- name: Download json reports
@ -2488,7 +2491,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/ast_fuzzer_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=AST fuzzer (debug, actions)
CHECK_NAME=AST fuzzer (debug)
REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse
EOF
- name: Download json reports
@ -2527,7 +2530,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -2565,7 +2568,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -2603,7 +2606,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -2641,7 +2644,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -2679,7 +2682,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -2717,7 +2720,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -2755,7 +2758,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -2793,7 +2796,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -2831,7 +2834,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
@ -2869,7 +2872,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan_flaky_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests flaky check (asan, actions)
CHECK_NAME=Integration tests flaky check (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse
EOF
- name: Download json reports
@ -2908,7 +2911,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (asan, actions)
CHECK_NAME=Unit tests (asan)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2944,7 +2947,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-clang, actions)
CHECK_NAME=Unit tests (release-clang)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@ -2980,7 +2983,7 @@ jobs:
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc, actions)
# CHECK_NAME=Unit tests (release-gcc)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
@ -3016,7 +3019,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (tsan, actions)
CHECK_NAME=Unit tests (tsan)
REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse
EOF
- name: Download json reports
@ -3052,7 +3055,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (msan, actions)
CHECK_NAME=Unit tests (msan)
REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse
EOF
- name: Download json reports
@ -3088,7 +3091,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (ubsan, actions)
CHECK_NAME=Unit tests (ubsan)
REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse
EOF
- name: Download json reports

View File

@ -473,7 +473,7 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
CHECK_NAME=ClickHouse build check
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
@ -517,7 +517,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, actions)
CHECK_NAME=Stateless tests (release)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -554,7 +554,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (aarch64, actions)
CHECK_NAME=Stateless tests (aarch64)
REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -591,7 +591,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -630,7 +630,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
CHECK_NAME=Stateless tests (address)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -669,7 +669,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -708,7 +708,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -747,7 +747,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (thread, actions)
CHECK_NAME=Stateless tests (thread)
REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -786,7 +786,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (ubsan, actions)
CHECK_NAME=Stateless tests (ubsan)
REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT=10800
EOF
@ -823,7 +823,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -862,7 +862,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -901,7 +901,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (memory, actions)
CHECK_NAME=Stateless tests (memory)
REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -940,7 +940,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
@ -979,7 +979,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
@ -1018,7 +1018,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (debug, actions)
CHECK_NAME=Stateless tests (debug)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=2
@ -1060,7 +1060,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, actions)
CHECK_NAME=Stateful tests (release)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1097,7 +1097,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (aarch64, actions)
CHECK_NAME=Stateful tests (aarch64)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1134,7 +1134,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (address, actions)
CHECK_NAME=Stateful tests (address)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1171,7 +1171,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (thread, actions)
CHECK_NAME=Stateful tests (thread)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1208,7 +1208,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (memory, actions)
CHECK_NAME=Stateful tests (memory)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1245,7 +1245,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, actions)
CHECK_NAME=Stateful tests (ubsan)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1282,7 +1282,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
CHECK_NAME=Stateful tests (debug)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
@ -1322,7 +1322,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (address, actions)
CHECK_NAME=Stress test (address)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -1362,7 +1362,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
CHECK_NAME=Stress test (thread)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
@ -1398,7 +1398,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (memory, actions)
CHECK_NAME=Stress test (memory)
REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse
EOF
- name: Download json reports
@ -1434,7 +1434,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_undefined
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (undefined, actions)
CHECK_NAME=Stress test (undefined)
REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse
EOF
- name: Download json reports
@ -1470,7 +1470,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (debug, actions)
CHECK_NAME=Stress test (debug)
REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse
EOF
- name: Download json reports
@ -1509,7 +1509,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=3
@ -1547,7 +1547,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=3
@ -1585,7 +1585,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, actions)
CHECK_NAME=Integration tests (asan)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=3
@ -1623,7 +1623,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=4
@ -1661,7 +1661,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=4
@ -1699,7 +1699,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=4
@ -1737,7 +1737,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (thread, actions)
CHECK_NAME=Integration tests (thread)
REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=4
@ -1775,7 +1775,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
@ -1813,7 +1813,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
CHECK_NAME=Integration tests (release)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2

6
.gitmodules vendored
View File

@ -259,6 +259,9 @@
[submodule "contrib/minizip-ng"]
path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl.git
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash.git
@ -274,9 +277,6 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing.git
[submodule "contrib/base-x"]
path = contrib/base-x
url = https://github.com/ClickHouse/base-x.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares

View File

@ -1,17 +1,18 @@
### Table of Contents
**[ClickHouse release v22.7, 2022-07-21](#226)**<br>
**[ClickHouse release v22.6, 2022-06-16](#226)**<br>
**[ClickHouse release v22.5, 2022-05-19](#225)**<br>
**[ClickHouse release v22.4, 2022-04-20](#224)**<br>
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br>
**[ClickHouse release v22.2, 2022-02-17](#222)**<br>
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br>
**[ClickHouse release v22.7, 2022-07-21](#227)**<br/>
**[ClickHouse release v22.6, 2022-06-16](#226)**<br/>
**[ClickHouse release v22.5, 2022-05-19](#225)**<br/>
**[ClickHouse release v22.4, 2022-04-20](#224)**<br/>
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br/>
**[ClickHouse release v22.2, 2022-02-17](#222)**<br/>
**[ClickHouse release v22.1, 2022-01-18](#221)**<br/>
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br/>
### <a id="227"></a> ClickHouse release 22.7, 2022-07-21
#### Upgrade Notes
* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable `format_csv_allow_single_quotes` by default. See [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). ([Kruglov Pavel](https://github.com/Avogar)).
* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back.

View File

@ -554,6 +554,16 @@ macro (clickhouse_add_executable target)
endif()
endmacro()
# With cross-compiling, all targets are built for the target platform which usually different from the host
# platform. This is problematic if a build artifact X (e.g. a file or an executable) is generated by running
# another executable Y previously produced in the build. This is solved by compiling and running Y for/on
# the host platform. Add target to the list:
# add_native_target(<target> ...)
set_property (GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
function (add_native_target)
set_property (GLOBAL APPEND PROPERTY NATIVE_BUILD_TARGETS ${ARGV})
endfunction (add_native_target)
set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.")
include_directories(${ConfigIncludePath})
@ -568,3 +578,33 @@ add_subdirectory (tests)
add_subdirectory (utils)
include (cmake/sanitize_target_link_libraries.cmake)
# Build native targets if necessary
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
if (NATIVE_BUILD_TARGETS
AND NOT(
CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR
)
)
message (STATUS "Building native targets...")
set (NATIVE_BUILD_DIR "${CMAKE_BINARY_DIR}/native")
execute_process(
COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}"
COMMAND_ECHO STDOUT)
execute_process(
COMMAND ${CMAKE_COMMAND}
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
${CMAKE_SOURCE_DIR}
WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
COMMAND_ECHO STDOUT)
execute_process(
COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS}
COMMAND_ECHO STDOUT)
endif ()

View File

@ -669,18 +669,18 @@ std::string JSON::getName() const
return getString();
}
StringRef JSON::getRawString() const
std::string_view JSON::getRawString() const
{
Pos s = ptr_begin;
if (*s != '"')
throw JSONException(std::string("JSON: expected \", got ") + *s);
while (++s != ptr_end && *s != '"');
if (s != ptr_end)
return StringRef(ptr_begin + 1, s - ptr_begin - 1);
return std::string_view(ptr_begin + 1, s - ptr_begin - 1);
throw JSONException("JSON: incorrect syntax (expected end of string, found end of JSON).");
}
StringRef JSON::getRawName() const
std::string_view JSON::getRawName() const
{
return getRawString();
}

View File

@ -136,8 +136,8 @@ public:
std::string getName() const; /// Получить имя name-value пары.
JSON getValue() const; /// Получить значение name-value пары.
StringRef getRawString() const;
StringRef getRawName() const;
std::string_view getRawString() const;
std::string_view getRawName() const;
/// Получить значение элемента; если элемент - строка, то распарсить значение из строки; если не строка или число - то исключение.
double toDouble() const;

View File

@ -1,68 +1,192 @@
#include <sys/auxv.h>
#include "atomic.h"
#include <unistd.h> // __environ
#include <sys/auxv.h>
#include <fcntl.h> // open
#include <sys/stat.h> // O_RDONLY
#include <unistd.h> // read, close
#include <stdlib.h> // ssize_t
#include <stdio.h> // perror, fprintf
#include <link.h> // ElfW
#include <errno.h>
// We don't have libc struct available here. Compute aux vector manually.
static unsigned long * __auxv = NULL;
static unsigned long __auxv_secure = 0;
#define ARRAY_SIZE(a) sizeof((a))/sizeof((a[0]))
static size_t __find_auxv(unsigned long type)
/// Suppress TSan since it is possible for this code to be called from multiple threads,
/// and initialization is safe to be done multiple times from multiple threads.
#if defined(__clang__)
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
#else
# define NO_SANITIZE_THREAD
#endif
// We don't have libc struct available here.
// Compute aux vector manually (from /proc/self/auxv).
//
// Right now there is only 51 AT_* constants,
// so 64 should be enough until this implementation will be replaced with musl.
static unsigned long __auxv_procfs[64];
static unsigned long __auxv_secure = 0;
// Common
static unsigned long * __auxv_environ = NULL;
static void * volatile getauxval_func;
static unsigned long __auxv_init_environ(unsigned long type);
//
// auxv from procfs interface
//
ssize_t __retry_read(int fd, void * buf, size_t count)
{
for (;;)
{
ssize_t ret = read(fd, buf, count);
if (ret == -1)
{
if (errno == EINTR)
{
continue;
}
perror("Cannot read /proc/self/auxv");
abort();
}
return ret;
}
}
unsigned long NO_SANITIZE_THREAD __getauxval_procfs(unsigned long type)
{
if (type == AT_SECURE)
{
return __auxv_secure;
}
if (type >= ARRAY_SIZE(__auxv_procfs))
{
errno = ENOENT;
return 0;
}
return __auxv_procfs[type];
}
static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type)
{
// For debugging:
// - od -t dL /proc/self/auxv
// - LD_SHOW_AUX= ls
int fd = open("/proc/self/auxv", O_RDONLY);
// It is possible in case of:
// - no procfs mounted
// - on android you are not able to read it unless running from shell or debugging
// - some other issues
if (fd == -1)
{
// Fallback to environ.
a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__auxv_init_environ);
return __auxv_init_environ(type);
}
ElfW(auxv_t) aux;
/// NOTE: sizeof(aux) is very small (less then PAGE_SIZE), so partial read should not be possible.
_Static_assert(sizeof(aux) < 4096, "Unexpected sizeof(aux)");
while (__retry_read(fd, &aux, sizeof(aux)) == sizeof(aux))
{
if (aux.a_type == AT_NULL)
{
break;
}
if (aux.a_type == AT_IGNORE || aux.a_type == AT_IGNOREPPC)
{
continue;
}
if (aux.a_type >= ARRAY_SIZE(__auxv_procfs))
{
fprintf(stderr, "AT_* is out of range: %li (maximum allowed is %zu)\n", aux.a_type, ARRAY_SIZE(__auxv_procfs));
abort();
}
if (__auxv_procfs[aux.a_type])
{
/// It is possible due to race on initialization.
}
__auxv_procfs[aux.a_type] = aux.a_un.a_val;
}
close(fd);
__auxv_secure = __getauxval_procfs(AT_SECURE);
// Now we've initialized __auxv_procfs, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__getauxval_procfs);
return __getauxval_procfs(type);
}
//
// auxv from environ interface
//
// NOTE: environ available only after static initializers,
// so you cannot rely on this if you need getauxval() before.
//
// Good example of such user is sanitizers, for example
// LSan will not work with __auxv_init_environ(),
// since it needs getauxval() before.
//
static size_t NO_SANITIZE_THREAD __find_auxv(unsigned long type)
{
size_t i;
for (i = 0; __auxv[i]; i += 2)
for (i = 0; __auxv_environ[i]; i += 2)
{
if (__auxv[i] == type)
if (__auxv_environ[i] == type)
{
return i + 1;
}
}
return (size_t) -1;
}
unsigned long __getauxval(unsigned long type)
unsigned long NO_SANITIZE_THREAD __getauxval_environ(unsigned long type)
{
if (type == AT_SECURE)
return __auxv_secure;
if (__auxv)
if (__auxv_environ)
{
size_t index = __find_auxv(type);
if (index != ((size_t) -1))
return __auxv[index];
return __auxv_environ[index];
}
errno = ENOENT;
return 0;
}
static void * volatile getauxval_func;
static unsigned long __auxv_init(unsigned long type)
static unsigned long NO_SANITIZE_THREAD __auxv_init_environ(unsigned long type)
{
if (!__environ)
{
// __environ is not initialized yet so we can't initialize __auxv right now.
// __environ is not initialized yet so we can't initialize __auxv_environ right now.
// That's normally occurred only when getauxval() is called from some sanitizer's internal code.
errno = ENOENT;
return 0;
}
// Initialize __auxv and __auxv_secure.
// Initialize __auxv_environ and __auxv_secure.
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
__auxv_environ = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
__auxv_secure = __auxv_environ[secure_idx];
// Now we've initialized __auxv, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval);
// Now we need to switch to __getauxval_environ for all later calls, since
// everything is initialized.
a_cas_p(&getauxval_func, (void *)__auxv_init_environ, (void *)__getauxval_environ);
return __getauxval(type);
return __getauxval_environ(type);
}
// First time getauxval() will call __auxv_init().
static void * volatile getauxval_func = (void *)__auxv_init;
// Callchain:
// - __auxv_init_procfs -> __getauxval_environ
// - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ
static void * volatile getauxval_func = (void *)__auxv_init_procfs;
unsigned long getauxval(unsigned long type)
{

View File

@ -156,8 +156,8 @@ endif()
add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry)
add_contrib (base-x-cmake base-x)
add_contrib(c-ares-cmake c-ares)
add_contrib (c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear

2
contrib/avro vendored

@ -1 +1 @@
Subproject commit e43c46e87fd32eafdc09471e95344555454c5ef8
Subproject commit 7832659ec986075d560f930c288e973c64679552

1
contrib/base-x vendored

@ -1 +0,0 @@
Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5

View File

@ -1,28 +0,0 @@
option (ENABLE_BASEX "Enable base-x" ${ENABLE_LIBRARIES})
if (NOT ENABLE_BASEX)
message(STATUS "Not using base-x")
return()
endif()
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x")
set (SRCS
${LIBRARY_DIR}/base_x.hh
${LIBRARY_DIR}/uinteger_t.hh
)
add_library(_base-x INTERFACE)
target_include_directories(_base-x SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/base-x")
if (XCODE OR XCODE_VERSION)
# https://gitlab.kitware.com/cmake/cmake/issues/17457
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
# This applies to Xcode.
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
endif ()
target_sources(_base-x PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
endif ()
add_library(ch_contrib::base-x ALIAS _base-x)

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit 5e23e96c0c02e451dbb291cf9f66231d02b6cdb6
Subproject commit 3f975ecab377cd5f739af780566596128f17bb74

1
contrib/qpl vendored Submodule

@ -0,0 +1 @@
Subproject commit cdc8442f7a5e7a6ff6eea39c69665e0c5034d85d

View File

@ -0,0 +1,322 @@
## The Intel® QPL provides high performance implementations of data processing functions for existing hardware accelerator, and/or software path in case if hardware accelerator is not available.
if (OS_LINUX AND ARCH_AMD64 AND (ENABLE_AVX2 OR ENABLE_AVX512))
option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES})
elseif(ENABLE_QPL)
message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with avx2/avx512 support")
endif()
if (NOT ENABLE_QPL)
message(STATUS "Not using QPL")
return()
endif()
set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl")
set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources")
set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl")
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
set (EFFICIENT_WAIT ON)
set (BLOCK_ON_FAULT ON)
set (LOG_HW_INIT OFF)
set (SANITIZE_MEMORY OFF)
set (SANITIZE_THREADS OFF)
set (LIB_FUZZING_ENGINE OFF)
function(GetLibraryVersion _content _outputVar)
string(REGEX MATCHALL "Qpl VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}")
SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE)
endfunction()
FILE(READ "${QPL_PROJECT_DIR}/CMakeLists.txt" HEADER_CONTENT)
GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION)
message(STATUS "Intel QPL version: ${QPL_VERSION}")
# There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api.
# Generate 7 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, core_iaa, middle_layer_lib.
# Output ch_contrib::qpl by linking with 7 library targets.
include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
# check nasm compiler
include(CheckLanguage)
check_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER)
message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
endif()
# [SUBDIR]isal
enable_language(ASM_NASM)
set(ISAL_C_SRC ${QPL_SRC_DIR}/isal/igzip/adler32_base.c
${QPL_SRC_DIR}/isal/igzip/huff_codes.c
${QPL_SRC_DIR}/isal/igzip/hufftables_c.c
${QPL_SRC_DIR}/isal/igzip/igzip.c
${QPL_SRC_DIR}/isal/igzip/igzip_base.c
${QPL_SRC_DIR}/isal/igzip/flatten_ll.c
${QPL_SRC_DIR}/isal/igzip/encode_df.c
${QPL_SRC_DIR}/isal/igzip/igzip_icf_base.c
${QPL_SRC_DIR}/isal/igzip/igzip_inflate.c
${QPL_SRC_DIR}/isal/igzip/igzip_icf_body.c
${QPL_SRC_DIR}/isal/crc/crc_base.c
${QPL_SRC_DIR}/isal/crc/crc64_base.c)
set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm
${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_04.asm
${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_06.asm
${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_04.asm
${QPL_SRC_DIR}/isal/igzip/igzip_finish.asm
${QPL_SRC_DIR}/isal/igzip/encode_df_04.asm
${QPL_SRC_DIR}/isal/igzip/encode_df_06.asm
${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_01.asm
${QPL_SRC_DIR}/isal/igzip/proc_heap.asm
${QPL_SRC_DIR}/isal/igzip/igzip_icf_body_h1_gr_bt.asm
${QPL_SRC_DIR}/isal/igzip/igzip_icf_finish.asm
${QPL_SRC_DIR}/isal/igzip/igzip_inflate_multibinary.asm
${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_01.asm
${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_04.asm
${QPL_SRC_DIR}/isal/igzip/rfc1951_lookup.asm
${QPL_SRC_DIR}/isal/igzip/adler32_sse.asm
${QPL_SRC_DIR}/isal/igzip/adler32_avx2_4.asm
${QPL_SRC_DIR}/isal/igzip/igzip_deflate_hash.asm
${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_04.asm
${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_06.asm
${QPL_SRC_DIR}/isal/igzip/igzip_multibinary.asm
${QPL_SRC_DIR}/isal/igzip/stdmac.asm
${QPL_SRC_DIR}/isal/crc/crc_multibinary.asm
${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8.asm
${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8_02.asm
${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by16_10.asm
${QPL_SRC_DIR}/isal/crc/crc32_ieee_01.asm
${QPL_SRC_DIR}/isal/crc/crc32_ieee_02.asm
${QPL_SRC_DIR}/isal/crc/crc32_ieee_by4.asm
${QPL_SRC_DIR}/isal/crc/crc32_ieee_by16_10.asm
${QPL_SRC_DIR}/isal/crc/crc32_iscsi_00.asm
${QPL_SRC_DIR}/isal/crc/crc32_iscsi_01.asm
${QPL_SRC_DIR}/isal/crc/crc32_iscsi_by16_10.asm)
# Adding ISA-L library target
add_library(isal OBJECT ${ISAL_C_SRC})
add_library(isal_asm OBJECT ${ISAL_ASM_SRC})
# Setting external and internal interfaces for ISA-L library
target_include_directories(isal
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/isal/include>
PRIVATE ${QPL_SRC_DIR}/isal/include
PUBLIC ${QPL_SRC_DIR}/isal/igzip)
target_compile_options(isal PRIVATE
"$<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>"
"$<$<CONFIG:Debug>:>"
"$<$<CONFIG:Release>:>")
target_compile_options(isal_asm PUBLIC "-I${QPL_SRC_DIR}/isal/include/"
PUBLIC "-I${QPL_SRC_DIR}/isal/igzip/"
PUBLIC "-I${QPL_SRC_DIR}/isal/crc/"
PUBLIC "-DQPL_LIB")
# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
# AS_FEATURE_LEVEL=5 means "Check SIMD capabilities of the target system at runtime and use up to AVX2 if available".
# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
if (ENABLE_AVX512)
target_compile_options(isal_asm PUBLIC "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10")
else()
target_compile_options(isal_asm PUBLIC "-DAS_FEATURE_LEVEL=5")
endif()
# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS.
# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined"
if (SANITIZE STREQUAL "undefined")
get_target_property(target_options isal_asm COMPILE_OPTIONS)
list(REMOVE_ITEM target_options "-fno-sanitize=undefined")
set_property(TARGET isal_asm PROPERTY COMPILE_OPTIONS ${target_options})
endif()
target_compile_definitions(isal PUBLIC
QPL_LIB
NDEBUG)
# [SUBDIR]core-sw
# Two libraries:qplcore_avx512/qplcore_px for SW fallback will be created which are implemented by AVX512 and non-AVX512 instructions respectively.
# The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function.
# Hence, here we don't need put qplcore_avx512 under an ENABLE_AVX512 CMake switch.
# Actually, if we do that, some undefined symbols errors would happen because both of AVX512 function and non-AVX512 function are referenced by QPL API.
# PLATFORM=2 means AVX512 implementation; PLATFORM=0 means non-AVX512 implementation.
# Find Core Sources
file(GLOB SOURCES
${QPL_SRC_DIR}/core-sw/src/checksums/*.c
${QPL_SRC_DIR}/core-sw/src/filtering/*.c
${QPL_SRC_DIR}/core-sw/src/other/*.c
${QPL_SRC_DIR}/core-sw/src/compression/*.c)
file(GLOB DATA_SOURCES
${QPL_SRC_DIR}/core-sw/src/data/*.c)
# Create avx512 library
add_library(qplcore_avx512 OBJECT ${SOURCES})
target_compile_definitions(qplcore_avx512 PRIVATE PLATFORM=2)
target_include_directories(qplcore_avx512
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(qplcore_avx512 PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
target_link_libraries(qplcore_avx512 ${CMAKE_DL_LIBS} isal)
target_compile_options(qplcore_avx512
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE -march=skylake-avx512
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
target_compile_definitions(qplcore_avx512 PUBLIC QPL_BADARG_CHECK)
#
# Create px library
#
#set(CMAKE_INCLUDE_CURRENT_DIR ON)
# Create library
add_library(qplcore_px OBJECT ${SOURCES} ${DATA_SOURCES})
target_compile_definitions(qplcore_px PRIVATE PLATFORM=0)
target_include_directories(qplcore_px
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(qplcore_px PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
target_link_libraries(qplcore_px isal ${CMAKE_DL_LIBS})
target_compile_options(qplcore_px
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
target_compile_definitions(qplcore_px PUBLIC QPL_BADARG_CHECK)
# [SUBDIR]core-iaa
file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
${QPL_SRC_DIR}/core-iaa/sources/aecs/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.c
${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.c
${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/bit_rev.c)
# Create library
add_library(core_iaa OBJECT ${HW_PATH_SRC})
target_include_directories(core_iaa
PRIVATE ${UUID_DIR}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/include>
PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include>
PRIVATE $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>)
target_compile_options(core_iaa
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>)
target_compile_features(core_iaa PRIVATE c_std_11)
target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK
PRIVATE $<$<BOOL:${BLOCK_ON_FAULT}>: BLOCK_ON_FAULT_ENABLED>
PRIVATE $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>)
# [SUBDIR]middle-layer
generate_unpack_kernel_arrays(${QPL_BINARY_DIR})
file(GLOB MIDDLE_LAYER_SRC
${QPL_SRC_DIR}/middle-layer/analytics/*.cpp
${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp
${QPL_SRC_DIR}/middle-layer/checksum/*.cpp
${QPL_SRC_DIR}/middle-layer/common/*.cpp
${QPL_SRC_DIR}/middle-layer/compression/*.cpp
${QPL_SRC_DIR}/middle-layer/compression/*/*.cpp
${QPL_SRC_DIR}/middle-layer/compression/*/*/*.cpp
${QPL_SRC_DIR}/middle-layer/dispatcher/*.cpp
${QPL_SRC_DIR}/middle-layer/other/*.cpp
${QPL_SRC_DIR}/middle-layer/util/*.cpp
${QPL_SRC_DIR}/middle-layer/inflate/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo
file(GLOB GENERATED_PX_TABLES_SRC ${QPL_BINARY_DIR}/generated/px_*.cpp)
file(GLOB GENERATED_AVX512_TABLES_SRC ${QPL_BINARY_DIR}/generated/avx512_*.cpp)
add_library(middle_layer_lib OBJECT
${GENERATED_PX_TABLES_SRC}
${GENERATED_AVX512_TABLES_SRC}
${MIDDLE_LAYER_SRC})
target_compile_options(middle_layer_lib
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
target_compile_definitions(middle_layer_lib
PUBLIC QPL_VERSION="${QPL_VERSION}"
PUBLIC $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
PUBLIC $<$<BOOL:${EFFICIENT_WAIT}>:QPL_EFFICIENT_WAIT>
PUBLIC QPL_BADARG_CHECK)
set_source_files_properties(${GENERATED_PX_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0)
set_source_files_properties(${GENERATED_AVX512_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2)
target_include_directories(middle_layer_lib
PRIVATE ${UUID_DIR}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/middle-layer>
PUBLIC $<TARGET_PROPERTY:_qpl,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_px,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:core_iaa,INTERFACE_INCLUDE_DIRECTORIES>)
target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB)
# [SUBDIR]c_api
file(GLOB_RECURSE QPL_C_API_SRC
${QPL_SRC_DIR}/c_api/*.c
${QPL_SRC_DIR}/c_api/*.cpp)
add_library(_qpl STATIC ${QPL_C_API_SRC}
$<TARGET_OBJECTS:middle_layer_lib>
$<TARGET_OBJECTS:isal>
$<TARGET_OBJECTS:isal_asm>
$<TARGET_OBJECTS:qplcore_px>
$<TARGET_OBJECTS:qplcore_avx512>
$<TARGET_OBJECTS:core_iaa>
$<TARGET_OBJECTS:middle_layer_lib>)
target_include_directories(_qpl
PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/>
PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>
PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>)
target_compile_options(_qpl
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
target_compile_definitions(_qpl
PRIVATE -DQPL_LIB
PRIVATE -DQPL_BADARG_CHECK
PUBLIC -DENABLE_QPL_COMPRESSION)
target_link_libraries(_qpl
PRIVATE ${CMAKE_DL_LIBS})
add_library (ch_contrib::qpl ALIAS _qpl)
target_include_directories(_qpl SYSTEM BEFORE PUBLIC "${QPL_PROJECT_DIR}/include")

View File

@ -0,0 +1,4 @@
#ifndef _QPL_UUID_UUID_H
#define _QPL_UUID_UUID_H
typedef unsigned char uuid_t[16];
#endif /* _QPL_UUID_UUID_H */

View File

@ -51,6 +51,7 @@ RUN apt-get update \
rename \
software-properties-common \
tzdata \
nasm \
--yes --no-install-recommends \
&& apt-get clean

View File

@ -55,6 +55,7 @@ RUN apt-get update \
pkg-config \
tzdata \
pv \
nasm \
--yes --no-install-recommends
# Sanitizer options for services (clickhouse-server)

View File

@ -72,6 +72,7 @@ RUN apt-get update \
tzdata \
unixodbc \
file \
nasm \
--yes --no-install-recommends
RUN pip3 install numpy scipy pandas Jinja2

View File

@ -17,7 +17,9 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -40,10 +40,10 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status):
with open(results_file, "w") as f:
with open(results_file, "w", encoding="utf-8") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, "w") as f:
with open(status_file, "w", encoding="utf-8") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
@ -53,9 +53,10 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of style check"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
default_dir = "/test_output"
parser.add_argument("--in-results-dir", default=default_dir)
parser.add_argument("--out-results-file", default=f"{default_dir}/test_results.tsv")
parser.add_argument("--out-status-file", default=f"{default_dir}/check_status.tsv")
args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir)

View File

@ -0,0 +1,36 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.6.4.35-stable FIXME as compared to v22.6.3.35-stable
#### Build/Testing/Packaging Improvement
* Backported in [#38822](https://github.com/ClickHouse/ClickHouse/issues/38822): - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#38242](https://github.com/ClickHouse/ClickHouse/issues/38242): Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#38865](https://github.com/ClickHouse/ClickHouse/issues/38865): Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#38853](https://github.com/ClickHouse/ClickHouse/issues/38853): Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#38942](https://github.com/ClickHouse/ClickHouse/issues/38942): - Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#39063](https://github.com/ClickHouse/ClickHouse/issues/39063): Any allocations inside OvercommitTracker may lead to deadlock. Logging was not very informative so it's easier just to remove logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#39077](https://github.com/ClickHouse/ClickHouse/issues/39077): Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#39151](https://github.com/ClickHouse/ClickHouse/issues/39151): Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#39275](https://github.com/ClickHouse/ClickHouse/issues/39275): Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#39371](https://github.com/ClickHouse/ClickHouse/issues/39371): Declare RabbitMQ queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)).
* Backported in [#39352](https://github.com/ClickHouse/ClickHouse/issues/39352): Fix incorrect fetch postgresql tables query fro PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### NO CL CATEGORY
* Backported in [#38685](https://github.com/ClickHouse/ClickHouse/issues/38685):. [#38449](https://github.com/ClickHouse/ClickHouse/pull/38449) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Use native Map type for OpenTelemetry attributes [#38814](https://github.com/ClickHouse/ClickHouse/pull/38814) ([Ilya Yatsishin](https://github.com/qoega)).
* Retry docker buildx commands with progressive sleep in between [#38898](https://github.com/ClickHouse/ClickHouse/pull/38898) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add docker_server.py running to backport and release CIs [#39011](https://github.com/ClickHouse/ClickHouse/pull/39011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix meilisearch tests [#39110](https://github.com/ClickHouse/ClickHouse/pull/39110) ([Kseniia Sumarokova](https://github.com/kssenii)).

View File

@ -119,16 +119,9 @@ On CentOS, RedHat run `sudo yum install cmake ninja-build`.
If you use Arch or Gentoo, you probably know it yourself how to install CMake.
For installing CMake and Ninja on Mac OS X first install Homebrew and then install everything else via brew:
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
brew install cmake ninja
Next, check the version of CMake: `cmake --version`. If it is below 3.12, you should install a newer version from the website: https://cmake.org/download/.
## C++ Compiler {#c-compiler}
Compilers Clang starting from version 11 is supported for building ClickHouse.
Compilers Clang starting from version 12 is supported for building ClickHouse.
Clang should be used instead of gcc. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations.
@ -138,9 +131,6 @@ On Ubuntu/Debian you can use the automatic installation script (check [official
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
```
Mac OS X build is also supported. Just run `brew install llvm`
## The Building Process {#the-building-process}
Now that you are ready to build ClickHouse we recommend you to create a separate directory `build` inside `ClickHouse` that will contain all of the build artefacts:

View File

@ -692,9 +692,7 @@ auto s = std::string{"Hello"};
**1.** Virtual inheritance is not used.
**2.** Exception specifiers from C++03 are not used.
**3.** Constructs which have convenient syntactic sugar in modern C++, e.g.
**2.** Constructs which have convenient syntactic sugar in modern C++, e.g.
```
// Traditional way without syntactic sugar
@ -745,7 +743,7 @@ But other things being equal, cross-platform or portable code is preferred.
**2.** Language: C++20 (see the list of available [C++20 features](https://en.cppreference.com/w/cpp/compiler_support#C.2B.2B20_features)).
**3.** Compiler: `clang`. At this time (April 2021), the code is compiled using clang version 11. (It can also be compiled using `gcc` version 10, but it's untested and not suitable for production usage).
**3.** Compiler: `clang`. At the time of writing (July 2022), the code is compiled using clang version >= 12. (It can also be compiled using `gcc`, but it's untested and not suitable for production usage).
The standard library is used (`libc++`).
@ -755,7 +753,7 @@ The standard library is used (`libc++`).
The CPU instruction set is the minimum supported set among our servers. Currently, it is SSE 4.2.
**6.** Use `-Wall -Wextra -Werror` compilation flags. Also `-Weverything` is used with few exceptions.
**6.** Use `-Wall -Wextra -Werror -Weverything` compilation flags with a few exception.
**7.** Use static linking with all libraries except those that are difficult to connect to statically (see the output of the `ldd` command).

View File

@ -81,11 +81,11 @@ $ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
## Performance Tests {#performance-tests}
Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Tests are located at `tests/performance`. Each test is represented by `.xml` file with description of test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation.
Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Performance tests are located at `tests/performance/`. Each test is represented by an `.xml` file with a description of the test case. Tests are run with `docker/test/performance-comparison` tool . See the readme file for invocation.
Each test run one or multiple queries (possibly with combinations of parameters) in a loop.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other `perf` tools during your tests.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. Also, it is recommended to write performance tests when you add or modify SQL functions which are relatively isolated and not too obscure. It always makes sense to use `perf top` or other `perf` tools during your tests.
## Test Tools and Scripts {#test-tools-and-scripts}

View File

@ -482,9 +482,9 @@ For example:
## Projections {#projections}
Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries.
Projections are an experimental feature. To enable them you must set the [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) to `1`. See also the [force_optimize_projection](../../../operations/settings/settings.md#force-optimize-projection) setting.
::: note
When you are implementing projections you should also consider the [force_optimize_projection](../../../operations/settings/settings.md#force-optimize-projection) setting.
:::
Projections are not supported in the `SELECT` statements with the [FINAL](../../../sql-reference/statements/select/from.md#select-from-final) modifier.
### Projection Query {#projection-query}

View File

@ -194,18 +194,25 @@ Differs from the `TabSeparated` format in that the column names are written in t
During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
This format is also available under the name `TSVWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes}
Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
The first row with names is processed the same way as in `TabSeparatedWithNames` format.
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
This format is also available under the name `TSVWithNamesAndTypes`.
@ -451,10 +458,24 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## CSVWithNamesAndTypes {#csvwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## CustomSeparated {#format-customseparated}
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](../operations/settings/settings.md#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](../operations/settings/settings.md#format_custom_field_delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](../operations/settings/settings.md#format_custom_result_after_delimiter) settings, not from format strings.
@ -465,10 +486,24 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## SQLInsert {#sqlinsert}
Outputs data as a sequence of `INSERT INTO table (columns...) VALUES (...), (...) ...;` statements.
@ -911,18 +946,46 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie
Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes}
Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## JSONCompactStringsEachRowWithNames {#jsoncompactstringseachrowwithnames}
Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## JSONCompactStringsEachRowWithNamesAndTypes {#jsoncompactstringseachrowwithnamesandtypes}
Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
```json
["num", "str", "arr"]
["Int32", "String", "Array(UInt8)"]
@ -1199,6 +1262,12 @@ Similar to [RowBinary](#rowbinary), but with added header:
- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N)
- N `String`s specifying column names
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
:::
## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes}
Similar to [RowBinary](#rowbinary), but with added header:
@ -1207,6 +1276,14 @@ Similar to [RowBinary](#rowbinary), but with added header:
- N `String`s specifying column names
- N `String`s specifying column types
:::warning
If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## Values {#data-format-values}
Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces arent inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../sql-reference/syntax.md) is represented as `NULL`.

View File

@ -67,7 +67,7 @@ Features:
### Grafana {#grafana}
[Grafana](https://grafana.com/grafana/plugins/vertamedia-clickhouse-datasource) is a platform for monitoring and visualization.
[Grafana](https://grafana.com/grafana/plugins/grafana-clickhouse-datasource/) is a platform for monitoring and visualization.
"Grafana allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data driven culture. Trusted and loved by the community" &mdash; grafana.com.

View File

@ -45,7 +45,7 @@ Configuration template:
- `min_part_size` The minimum size of a data part.
- `min_part_size_ratio` The ratio of the data part size to the table size.
- `method` Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`.
- `method` Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`.
- `level` Compression level. See [Codecs](../../sql-reference/statements/create/table.md#create-query-general-purpose-codecs).
You can configure multiple `<case>` sections.

View File

@ -836,7 +836,7 @@ Result:
## now
Returns the current date and time.
Returns the current date and time at the moment of query analysis. The function is a constant expression.
**Syntax**
@ -884,14 +884,20 @@ Result:
└──────────────────────┘
```
## nowInBlock
Returns the current date and time at the moment of processing of each block of data. In contrast to the function `now`, it is not a constant expression, and the returned value will be different in different blocks for long-running queries.
It makes sense to use this function to generate the current time in long-running INSERT SELECT queries.
## today
Accepts zero arguments and returns the current date at one of the moments of request execution.
Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as toDate(now()).
## yesterday
Accepts zero arguments and returns yesterdays date at one of the moments of request execution.
Accepts zero arguments and returns yesterdays date at one of the moments of query analysis.
The same as today() - 1.
## timeSlot

View File

@ -494,22 +494,21 @@ If the s string is non-empty and does not contain the c character at
Returns the string s that was converted from the encoding in from to the encoding in to.
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(encoded_text[, alphabet_name])
## Base58Encode(plaintext), Base58Decode(encoded_text)
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using specified alphabet.
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet.
**Syntax**
```sql
base58Encode(decoded[, alphabet_name])
base58Decode(encoded[, alphabet_name])
base58Encode(decoded)
base58Decode(encoded)
```
**Arguments**
- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant.
- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown.
- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `bitcoin`.
**Returned value**
@ -522,17 +521,17 @@ Type: [String](../../sql-reference/data-types/string.md).
Query:
``` sql
SELECT base58Encode('encode', 'flickr');
SELECT base58Decode('izCFiDUY', 'ripple');
SELECT base58Encode('Encoded');
SELECT base58Encode('3dc8KtHrwM');
```
Result:
```text
┌─base58Encode('encode', 'flickr')─┐
SvyTHb1D
┌─encodeBase58('Encoded')─┐
3dc8KtHrwM
└──────────────────────────────────┘
┌─base58Decode('izCFiDUY', 'ripple')─┐
decode
┌─decodeBase58('3dc8KtHrwM')─┐
Encoded
└────────────────────────────────────┘
```

View File

@ -96,7 +96,7 @@ For more information, see the link: [RE2](https://github.com/google/re2/blob/mas
## translate(s, from, to)
The function replaces characters in the string s in accordance with one-to-one character mapping defined by from and to strings. from and to must be ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
The function replaces characters in the string s in accordance with one-to-one character mapping defined by from and to strings. from and to must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
Example:
@ -112,7 +112,7 @@ SELECT translate('Hello, World!', 'delor', 'DELOR') AS res
## translateUTF8(string, from, to)
Similar to previous function, but works with UTF-8 arguments. from and to must be valid UTF-8 strings of the same size.
Similar to previous function, but works with UTF-8 arguments. from and to must be valid constant UTF-8 strings of the same size.
Example:

View File

@ -248,6 +248,13 @@ ClickHouse supports general purpose codecs and specialized codecs.
High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage.
#### DEFLATE_QPL
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library, which has dependency on Intel Hardware:
- DEFLATE_QPL is only supported on systems with AVX2/AVX512/IAA.
- DEFLATE_QPL-compressed data can only be transferred between nodes with AVX2/AVX512/IAA.
### Specialized Codecs
These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation.

View File

@ -18,7 +18,6 @@ sidebar_label: "Используемые сторонние библиотеки
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) |
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |

View File

@ -15,16 +15,15 @@ $ make
Генерация данных:
:::danger "Внимание"
:::warning "Внимание"
-s 100 dbgen генерирует 600 миллионов строк (67 ГБ)
-s 1000 dbgen генерирует 6 миллиардов строк (занимает много времени)
:::
``` bash
$ ./dbgen -s 1000 -T c
$ ./dbgen -s 1000 -T l
$ ./dbgen -s 1000 -T p
$ ./dbgen -s 1000 -T s
$ ./dbgen -s 1000 -T d
```
Создание таблиц в Кликхауз:
@ -105,11 +104,10 @@ $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
``` sql
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
AS SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,

View File

@ -19,6 +19,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
@ -52,6 +53,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnames) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
@ -171,6 +173,12 @@ SELECT * FROM nestedt FORMAT TSV
При парсинге первая строка должна содержать имена столбцов. Вы можете использовать имена столбцов, чтобы указать их порядок расположения, или чтобы проверить их корректность.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
Этот формат также доступен под именем `TSVWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes}
@ -178,6 +186,14 @@ SELECT * FROM nestedt FORMAT TSV
Отличается от формата `TabSeparated` тем, что в первой строке пишутся имена столбцов, а во второй - типы столбцов.
При парсинге, первая и вторая строка полностью игнорируется.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
Этот формат также доступен под именем `TSVWithNamesAndTypes`.
## Template {#format-template}
@ -374,6 +390,24 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
Выводит также заголовок, аналогично [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## CSVWithNamesAndTypes {#csvwithnamesandtypes}
В первой строке пишутся имена столбцов, а во второй - типы столбцов, аналогично [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes)
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## CustomSeparated {#format-customseparated}
Аналогичен [Template](#format-template), но выводит (или считывает) все имена и типы столбцов, используя для них правило экранирования из настройки [format_custom_escaping_rule](../operations/settings/settings.md#format-custom-escaping-rule) и разделители из настроек [format_custom_field_delimiter](../operations/settings/settings.md#format-custom-field-delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format-custom-row-before-delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format-custom-row-after-delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format-custom-row-between-delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format-custom-result-before-delimiter) и [format_custom_result_after_delimiter](../operations/settings/settings.md#format-custom-result-after-delimiter), а не из форматных строк.
@ -384,10 +418,24 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
Выводит также заголовок с именами столбцов, аналогичен формату [TabSeparatedWithNames](#tabseparatedwithnames).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Выводит также два заголовка с именами и типами столбцов, аналогичен формату [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## JSON {#json}
Выводит данные в формате JSON. Кроме таблицы с данными, также выводятся имена и типы столбцов, и некоторая дополнительная информация - общее количество выведенных строк, а также количество строк, которое могло бы быть выведено, если бы не было LIMIT-а. Пример:
@ -660,6 +708,14 @@ SELECT * FROM json_square_brackets;
Отличается от `JSONCompactEachRow`/`JSONCompactStringsEachRow` тем, что имена и типы столбцов записываются как первые две строки.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
```json
["'hello'", "multiply(42, number)", "range(5)"]
["String", "UInt64", "Array(UInt8)"]
@ -904,6 +960,20 @@ Array представлены как длина в формате varint (unsig
Для поддержки [NULL](../sql-reference/syntax.md#null-literal) перед каждым значением типа [Nullable](../sql-reference/data-types/nullable.md) следует байт содержащий 1 или 0. Если байт 1, то значение равно NULL, и этот байт интерпретируется как отдельное значение (т.е. после него следует значение следующего поля). Если байт 0, то после байта следует значение поля (не равно NULL).
## RowBinaryWithNames {#rowbinarywithnames}
То же самое что [RowBinary](#rowbinary), но добавляется заголовок:
- Количество колонок - N, закодированное [LEB128](https://en.wikipedia.org/wiki/LEB128),
- N строк (`String`) с именами колонок,
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
:::
## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes}
То же самое что [RowBinary](#rowbinary), но добавляется заголовок:
@ -912,6 +982,14 @@ Array представлены как длина в формате varint (unsig
- N строк (`String`) с именами колонок,
- N строк (`String`) с типами колонок.
:::warning
Если включен параметр [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header),
столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields).
В противном случае первая строка будет пропущена.
Если включен параметр [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header),
типы из входных данных будут сравниваться с типами соответствующих столбцов из таблицы. В противном случае вторая строка будет пропущена.
:::
## Values {#data-format-values}
Выводит каждую строку в скобках. Строки разделены запятыми. После последней строки запятой нет. Значения внутри скобок также разделены запятыми. Числа выводятся в десятичном виде без кавычек. Массивы выводятся в квадратных скобках. Строки, даты, даты-с-временем выводятся в кавычках. Правила экранирования и особенности парсинга аналогичны формату [TabSeparated](#tabseparated). При форматировании, лишние пробелы не ставятся, а при парсинге - допустимы и пропускаются (за исключением пробелов внутри значений типа массив, которые недопустимы). [NULL](../sql-reference/syntax.md) представляется как `NULL`.

View File

@ -44,7 +44,7 @@ ClickHouse перезагружает встроенные словари с з
- `min_part_size` - Минимальный размер части таблицы.
- `min_part_size_ratio` - Отношение размера минимальной части таблицы к полному размеру таблицы.
- `method` - Метод сжатия. Возможные значения: `lz4`, `lz4hc`, `zstd`.
- `method` - Метод сжатия. Возможные значения: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`.
- `level` Уровень сжатия. См. [Кодеки](../../sql-reference/statements/create/table/#create-query-common-purpose-codecs).
Можно сконфигурировать несколько разделов `<case>`.

View File

@ -527,7 +527,7 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
- [Использование вложенных структур](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format.
## input_format_with_names_use_header {#settings-input-format-with-names-use-header}
## input_format_with_names_use_header {#input_format_with_names_use_header}
Включает или отключает проверку порядка столбцов при вставке данных.
@ -535,8 +535,38 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
Поддерживаемые форматы:
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [CSVWithNamesAndTypes](../../interfaces/formats.md#csvwithnamesandtypes)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
- [JSONCompactEachRowWithNames](../../interfaces/formats.md#jsoncompacteachrowwithnames)
- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md#jsoncompactstringseachrowwithnames)
- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
- [RowBinaryWithNames](../../interfaces/formats.md#rowbinarywithnames)
- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes)
- [CustomSeparatedWithNames](../../interfaces/formats.md#customseparatedwithnames)
- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md#customseparatedwithnamesandtypes)
Возможные значения:
- 0 — выключена.
- 1 — включена.
Значение по умолчанию: 1.
## input_format_with_types_use_header {#input_format_with_types_use_header}
Определяет, должен ли синтаксический анализатор формата проверять, соответствуют ли типы данных из входных данных типам данных из целевой таблицы.
Поддерживаемые форматы:
- [CSVWithNamesAndTypes](../../interfaces/formats.md#csvwithnamesandtypes)
- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes)
- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md#customseparatedwithnamesandtypes)
Возможные значения:
@ -626,8 +656,9 @@ ClickHouse может парсить только базовый формат `Y
Изменяет поведение операций, выполняемых со строгостью `ANY`.
:::danger "Внимание"
:::warning "Внимание"
Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../../engines/table-engines/special/join.md).
:::
Возможные значения:
@ -2082,8 +2113,9 @@ SELECT * FROM test_table
Устанавливает приоритет ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) для потоков, исполняющих запросы. Планировщик ОС учитывает эти приоритеты при выборе следующего потока для исполнения на доступном ядре CPU.
:::danger "Предупреждение"
:::warning "Предупреждение"
Для использования этой настройки необходимо установить свойство `CAP_SYS_NICE`. Пакет `clickhouse-server` устанавливает его во время инсталляции. Некоторые виртуальные окружения не позволяют установить `CAP_SYS_NICE`. В этом случае, `clickhouse-server` выводит сообщение при запуске.
:::
Допустимые значения:

View File

@ -5,7 +5,7 @@ sidebar_label: AggregateFunction
# AggregateFunction {#data-type-aggregatefunction}
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create.md#create-view). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
`AggregateFunction(name, types_of_arguments…)` — параметрический тип данных.
@ -63,5 +63,4 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP
## Пример использования {#primer-ispolzovaniia}
Смотрите в описании движка [AggregatingMergeTree](../../sql-reference/data-types/aggregatefunction.md).
Смотрите в описании движка [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md).

View File

@ -490,22 +490,21 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2);
Возвращает сконвертированную из кодировки from в кодировку to строку s.
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(plaintext[, alphabet_name]) {#base58}
## Base58Encode(plaintext), Base58Decode(encoded_text) {#base58}
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием указанного алфавита.
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием стандартного алфавита Bitcoin.
**Синтаксис**
```sql
base58Encode(decoded[, alphabet_name])
base58Decode(encoded[, alphabet_name])
encodeBase58(decoded)
decodeBase58(encoded)
```
**Аргументы**
- `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md).
- `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`.
- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `bitcoin`.
**Возвращаемое значение**
@ -518,16 +517,16 @@ base58Decode(encoded[, alphabet_name])
Запрос:
``` sql
SELECT base58Encode('encode', 'flickr');
SELECT base58Decode('izCFiDUY', 'ripple');
SELECT encodeBase58('encode');
SELECT decodeBase58('izCFiDUY');
```
Результат:
```text
┌─base58Encode('encode', 'flickr')─┐
┌─encodeBase58('encode', 'flickr')─┐
│ SvyTHb1D │
└──────────────────────────────────┘
┌─base58Decode('izCFiDUY', 'ripple')─┐
┌─decodeBase58('izCFiDUY', 'ripple')─┐
│ decode │
└────────────────────────────────────┘
```

View File

@ -85,7 +85,7 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
## translate(s, from, to)
Данная функция заменяет символы в строке s в соответствии с поэлементным отображением определяемым строками from и to. from и to должны быть корректными ASCII строками одного размера. Не ASCII символы в оригинальной строке не изменяются.
Данная функция заменяет символы в строке s в соответствии с поэлементным отображением определяемым строками from и to. from и to должны быть корректными константными ASCII строками одного размера. Не ASCII символы в оригинальной строке не изменяются.
Example:
@ -101,7 +101,7 @@ SELECT translate('Hello, World!', 'delor', 'DELOR') AS res
## translateUTF8(string, from, to)
Аналогично предыдущей функции, но работает со строками, состоящими из UTF-8 символов. from и to должны быть корректными UTF-8 строками одного размера.
Аналогично предыдущей функции, но работает со строками, состоящими из UTF-8 символов. from и to должны быть корректными константными UTF-8 строками одного размера.
Example:

View File

@ -15,15 +15,14 @@ $ make
开始生成数据:
!!! warning "注意"
:::warning "注意"
使用`-s 100`dbgen 将生成 6 亿行数据(67GB), 如果使用`-s 1000`它会生成 60 亿行数据(这需要很多时间))
:::
```bash
$ ./dbgen -s 1000 -T c
$ ./dbgen -s 1000 -T l
$ ./dbgen -s 1000 -T p
$ ./dbgen -s 1000 -T s
$ ./dbgen -s 1000 -T d
```
在 ClickHouse 中创建数据表:
@ -106,10 +105,8 @@ $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
AS SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,

View File

@ -18,7 +18,7 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A
option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"
${ENABLE_CLICKHOUSE_ALL})
if (CLICKHOUSE_SPLIT_BINARY OR NOT ENABLE_UTILS)
if (CLICKHOUSE_SPLIT_BINARY)
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" OFF)
else ()
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON)
@ -434,6 +434,9 @@ else ()
endif ()
set (CLICKHOUSE_BUNDLE)
if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
list(APPEND CLICKHOUSE_BUNDLE self-extracting)
endif ()
if (ENABLE_CLICKHOUSE_SERVER)
add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -79,6 +79,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
("block-size,b", po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
("hc", "use LZ4HC instead of LZ4")
("zstd", "use ZSTD instead of LZ4")
("deflate_qpl", "use deflate_qpl instead of LZ4")
("codec", po::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
("level", po::value<int>(), "compression level for codecs specified via flags")
("none", "use no compression instead of LZ4")
@ -103,6 +104,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
bool decompress = options.count("decompress");
bool use_lz4hc = options.count("hc");
bool use_zstd = options.count("zstd");
bool use_deflate_qpl = options.count("deflate_qpl");
bool stat_mode = options.count("stat");
bool use_none = options.count("none");
unsigned block_size = options["block-size"].as<unsigned>();
@ -110,7 +112,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
if (options.count("codec"))
codecs = options["codec"].as<std::vector<std::string>>();
if ((use_lz4hc || use_zstd || use_none) && !codecs.empty())
if ((use_lz4hc || use_zstd || use_deflate_qpl || use_none) && !codecs.empty())
throw Exception("Wrong options, codec flags like --zstd and --codec options are mutually exclusive", ErrorCodes::BAD_ARGUMENTS);
if (!codecs.empty() && options.count("level"))
@ -122,6 +124,8 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
method_family = "LZ4HC";
else if (use_zstd)
method_family = "ZSTD";
else if (use_deflate_qpl)
method_family = "DEFLATE_QPL";
else if (use_none)
method_family = "NONE";

View File

@ -34,6 +34,10 @@
#include <base/bit_cast.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <memory>
#include <cmath>
#include <unistd.h>
@ -95,6 +99,9 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_SEEK_THROUGH_FILE;
extern const int UNKNOWN_FORMAT_VERSION;
extern const int INCORRECT_NUMBER_OF_COLUMNS;
extern const int TYPE_MISMATCH;
}
@ -115,6 +122,12 @@ public:
/// Deterministically change seed to some other value. This can be used to generate more values than were in source.
virtual void updateSeed() = 0;
/// Save into file. Binary, platform-dependent, version-dependent serialization.
virtual void serialize(WriteBuffer & out) const = 0;
/// Read from file
virtual void deserialize(ReadBuffer & in) = 0;
virtual ~IModel() = default;
};
@ -189,6 +202,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -230,6 +245,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -279,6 +296,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -311,6 +330,8 @@ class IdentityModel : public IModel
public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -395,6 +416,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -431,6 +454,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -469,6 +494,8 @@ public:
void train(const IColumn &) override {}
void finalize() override {}
void serialize(WriteBuffer &) const override {}
void deserialize(ReadBuffer &) override {}
ColumnPtr generate(const IColumn & column) override
{
@ -512,6 +539,26 @@ struct MarkovModelParameters
size_t frequency_add;
double frequency_desaturate;
size_t determinator_sliding_window_size;
void serialize(WriteBuffer & out) const
{
writeBinary(order, out);
writeBinary(frequency_cutoff, out);
writeBinary(num_buckets_cutoff, out);
writeBinary(frequency_add, out);
writeBinary(frequency_desaturate, out);
writeBinary(determinator_sliding_window_size, out);
}
void deserialize(ReadBuffer & in)
{
readBinary(order, in);
readBinary(frequency_cutoff, in);
readBinary(num_buckets_cutoff, in);
readBinary(frequency_add, in);
readBinary(frequency_desaturate, in);
readBinary(determinator_sliding_window_size, in);
}
};
@ -565,6 +612,39 @@ private:
return END;
}
void serialize(WriteBuffer & out) const
{
writeBinary(total, out);
writeBinary(count_end, out);
size_t size = buckets.size();
writeBinary(size, out);
for (const auto & elem : buckets)
{
writeBinary(elem.first, out);
writeBinary(elem.second, out);
}
}
void deserialize(ReadBuffer & in)
{
readBinary(total, in);
readBinary(count_end, in);
size_t size = 0;
readBinary(size, in);
buckets.reserve(size);
for (size_t i = 0; i < size; ++i)
{
Buckets::value_type elem;
readBinary(elem.first, in);
readBinary(elem.second, in);
buckets.emplace(std::move(elem));
}
}
};
using Table = HashMap<NGramHash, Histogram, TrivialHash>;
@ -621,6 +701,37 @@ public:
explicit MarkovModel(MarkovModelParameters params_)
: params(std::move(params_)), code_points(params.order, BEGIN) {}
void serialize(WriteBuffer & out) const
{
params.serialize(out);
size_t size = table.size();
writeBinary(size, out);
for (const auto & elem : table)
{
writeBinary(elem.getKey(), out);
elem.getMapped().serialize(out);
}
}
void deserialize(ReadBuffer & in)
{
params.deserialize(in);
size_t size = 0;
readBinary(size, in);
table.reserve(size);
for (size_t i = 0; i < size; ++i)
{
NGramHash key{};
readBinary(key, in);
Histogram & histogram = table[key];
histogram.deserialize(in);
}
}
void consume(const char * data, size_t size)
{
/// First 'order' number of code points are pre-filled with BEGIN.
@ -655,7 +766,6 @@ public:
}
}
void finalize()
{
if (params.num_buckets_cutoff)
@ -878,6 +988,16 @@ public:
{
seed = hash(seed);
}
void serialize(WriteBuffer & out) const override
{
markov_model.serialize(out);
}
void deserialize(ReadBuffer & in) override
{
markov_model.deserialize(in);
}
};
@ -916,6 +1036,16 @@ public:
{
nested_model->updateSeed();
}
void serialize(WriteBuffer & out) const override
{
nested_model->serialize(out);
}
void deserialize(ReadBuffer & in) override
{
nested_model->deserialize(in);
}
};
@ -954,6 +1084,16 @@ public:
{
nested_model->updateSeed();
}
void serialize(WriteBuffer & out) const override
{
nested_model->serialize(out);
}
void deserialize(ReadBuffer & in) override
{
nested_model->deserialize(in);
}
};
@ -1046,6 +1186,18 @@ public:
for (auto & model : models)
model->updateSeed();
}
void serialize(WriteBuffer & out) const
{
for (const auto & model : models)
model->serialize(out);
}
void deserialize(ReadBuffer & in)
{
for (auto & model : models)
model->deserialize(in);
}
};
}
@ -1068,8 +1220,10 @@ try
("input-format", po::value<std::string>(), "input format of the initial table data")
("output-format", po::value<std::string>(), "default output format")
("seed", po::value<std::string>(), "seed (arbitrary string), must be random string with at least 10 bytes length; note that a seed for each column is derived from this seed and a column name: you can obfuscate data for different tables and as long as you use identical seed and identical column names, the data for corresponding non-text columns for different tables will be transformed in the same way, so the data for different tables can be JOINed after obfuscation")
("limit", po::value<UInt64>(), "if specified - stop after generating that number of rows")
("limit", po::value<UInt64>(), "if specified - stop after generating that number of rows; the limit can be also greater than the number of source dataset - in this case it will process the dataset in a loop more than one time, using different seeds on every iteration, generating result as large as needed")
("silent", po::value<bool>()->default_value(false), "don't print information messages to stderr")
("save", po::value<std::string>(), "save the models after training to the specified file. You can use --limit 0 to skip the generation step. The file is using binary, platform-dependent, opaque serialization format. The model parameters are saved, while the seed is not.")
("load", po::value<std::string>(), "load the models instead of training from the specified file. The table structure must match the saved file. The seed should be specified separately, while other model parameters are loaded.")
("order", po::value<UInt64>()->default_value(5), "order of markov model to generate strings")
("frequency-cutoff", po::value<UInt64>()->default_value(5), "frequency cutoff for markov model: remove all buckets with count less than specified")
("num-buckets-cutoff", po::value<UInt64>()->default_value(0), "cutoff for number of different possible continuations for a context: remove all histograms with less than specified number of buckets")
@ -1096,12 +1250,26 @@ try
return 0;
}
if (options.count("save") && options.count("load"))
{
std::cerr << "The options --save and --load cannot be used together.\n";
return 1;
}
UInt64 seed = sipHash64(options["seed"].as<std::string>());
std::string structure = options["structure"].as<std::string>();
std::string input_format = options["input-format"].as<std::string>();
std::string output_format = options["output-format"].as<std::string>();
std::string load_from_file;
std::string save_into_file;
if (options.count("load"))
load_from_file = options["load"].as<std::string>();
else if (options.count("save"))
save_into_file = options["save"].as<std::string>();
UInt64 limit = 0;
if (options.count("limit"))
limit = options["limit"].as<UInt64>();
@ -1117,7 +1285,7 @@ try
markov_model_params.frequency_desaturate = options["frequency-desaturate"].as<double>();
markov_model_params.determinator_sliding_window_size = options["determinator-sliding-window-size"].as<UInt64>();
// Create header block
/// Create the header block
std::vector<std::string> structure_vals;
boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on);
@ -1143,6 +1311,7 @@ try
ReadBufferFromFileDescriptor file_in(STDIN_FILENO);
WriteBufferFromFileDescriptor file_out(STDOUT_FILENO);
if (load_from_file.empty())
{
/// stdin must be seekable
auto res = lseek(file_in.getFD(), 0, SEEK_SET);
@ -1156,6 +1325,9 @@ try
/// Train step
UInt64 source_rows = 0;
bool rewind_needed = false;
if (load_from_file.empty())
{
if (!silent)
std::cerr << "Training models\n";
@ -1173,11 +1345,71 @@ try
if (!silent)
std::cerr << "Processed " << source_rows << " rows\n";
}
obfuscator.finalize();
rewind_needed = true;
}
else
{
if (!silent)
std::cerr << "Loading models\n";
ReadBufferFromFile model_file_in(load_from_file);
CompressedReadBuffer model_in(model_file_in);
UInt8 version = 0;
readBinary(version, model_in);
if (version != 0)
throw Exception("Unknown version of the model file", ErrorCodes::UNKNOWN_FORMAT_VERSION);
readBinary(source_rows, model_in);
Names data_types = header.getDataTypeNames();
size_t header_size = 0;
readBinary(header_size, model_in);
if (header_size != data_types.size())
throw Exception("The saved model was created for different number of columns", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS);
for (size_t i = 0; i < header_size; ++i)
{
String type;
readBinary(type, model_in);
if (type != data_types[i])
throw Exception("The saved model was created for different types of columns", ErrorCodes::TYPE_MISMATCH);
}
obfuscator.deserialize(model_in);
}
obfuscator.finalize();
if (!save_into_file.empty())
{
if (!silent)
std::cerr << "Saving models\n";
if (!limit)
WriteBufferFromFile model_file_out(save_into_file);
CompressedWriteBuffer model_out(model_file_out, CompressionCodecFactory::instance().get("ZSTD", 1));
/// You can change version on format change, it is currently set to zero.
UInt8 version = 0;
writeBinary(version, model_out);
writeBinary(source_rows, model_out);
/// We are writing the data types for validation, because the models serialization depends on the data types.
Names data_types = header.getDataTypeNames();
size_t header_size = data_types.size();
writeBinary(header_size, model_out);
for (const auto & type : data_types)
writeBinary(type, model_out);
/// Write the models.
obfuscator.serialize(model_out);
model_out.finalize();
model_file_out.finalize();
}
if (!options.count("limit"))
limit = source_rows;
/// Generation step
@ -1187,7 +1419,8 @@ try
if (!silent)
std::cerr << "Generating data\n";
file_in.seek(0, SEEK_SET);
if (rewind_needed)
file_in.rewind();
Pipe pipe(context->getInputFormat(input_format, file_in, header, max_block_size));
@ -1220,6 +1453,7 @@ try
out_executor.finish();
obfuscator.updateSeed();
rewind_needed = true;
}
return 0;

View File

@ -1,6 +1,18 @@
if (NOT(
CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR
)
)
set (COMPRESSOR "${CMAKE_BINARY_DIR}/native/utils/self-extracting-executable/pre_compressor")
set (DECOMPRESSOR "--decompressor=${CMAKE_BINARY_DIR}/utils/self-extracting-executable/decompressor")
else ()
set (COMPRESSOR "${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor")
endif ()
add_custom_target (self-extracting ALL
${CMAKE_COMMAND} -E remove clickhouse
COMMAND ${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor clickhouse ../clickhouse
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse
DEPENDS clickhouse compressor
)

View File

@ -509,7 +509,6 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
if (referrals)
{
SCOPE_EXIT({
// ldap_value_free(referrals);
ber_memvfree(reinterpret_cast<void **>(referrals));
referrals = nullptr;
});

View File

@ -346,6 +346,12 @@ set_source_files_properties(
Columns/ColumnString.cpp
PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}")
if (ENABLE_QPL)
set_source_files_properties(
Compression/CompressionCodecDeflateQpl.cpp
PROPERTIES COMPILE_FLAGS "-mwaitpkg")
endif ()
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2_st)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2)
@ -530,6 +536,10 @@ endif ()
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)
if (TARGET ch_contrib::qpl)
dbms_target_link_libraries(PUBLIC ch_contrib::qpl)
endif ()
dbms_target_link_libraries(PRIVATE _boost_context)
if (ENABLE_NLP)

View File

@ -152,7 +152,6 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src)
auto & dst_column_host_name = typeid_cast<ColumnString &>(*mutable_columns[name_pos["host_name"]]);
auto & dst_array_current_time = typeid_cast<ColumnUInt32 &>(*mutable_columns[name_pos["current_time"]]).getData();
// auto & dst_array_thread_id = typeid_cast<ColumnUInt64 &>(*mutable_columns[name_pos["thread_id"]]).getData();
auto & dst_array_type = typeid_cast<ColumnInt8 &>(*mutable_columns[name_pos["type"]]).getData();
auto & dst_column_name = typeid_cast<ColumnString &>(*mutable_columns[name_pos["name"]]);
auto & dst_array_value = typeid_cast<ColumnInt64 &>(*mutable_columns[name_pos["value"]]).getData();

View File

@ -329,9 +329,9 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
case 0:
{
const auto r = fuzz_rand() % 3;
def.frame_type = r == 0 ? WindowFrame::FrameType::Rows
: r == 1 ? WindowFrame::FrameType::Range
: WindowFrame::FrameType::Groups;
def.frame_type = r == 0 ? WindowFrame::FrameType::ROWS
: r == 1 ? WindowFrame::FrameType::RANGE
: WindowFrame::FrameType::GROUPS;
break;
}
case 1:
@ -385,7 +385,7 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
break;
}
if (def.frame_type == WindowFrame::FrameType::Range
if (def.frame_type == WindowFrame::FrameType::RANGE
&& def.frame_begin_type == WindowFrame::BoundaryType::Unbounded
&& def.frame_begin_preceding
&& def.frame_end_type == WindowFrame::BoundaryType::Current)

View File

@ -132,14 +132,12 @@ namespace
ColumnLowCardinality::ColumnLowCardinality(MutableColumnPtr && column_unique_, MutableColumnPtr && indexes_, bool is_shared)
: dictionary(std::move(column_unique_), is_shared), idx(std::move(indexes_))
{
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insert(const Field & x)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x));
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertDefault()
@ -167,15 +165,12 @@ void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
const auto & nested = *low_cardinality_src->getDictionary().getNestedColumn();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position));
}
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n));
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -205,7 +200,6 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size());
idx.insertPositionsRange(*inserted_indexes->index(*sub_idx, 0), 0, length);
}
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length)
@ -213,7 +207,6 @@ void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length);
idx.insertPositionsRange(*inserted_indexes, 0, length);
// idx.check(getDictionary().size());
}
static void checkPositionsAreLimited(const IColumn & positions, UInt64 limit)
@ -254,14 +247,12 @@ void ColumnLowCardinality::insertRangeFromDictionaryEncodedColumn(const IColumn
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(keys, 0, keys.size());
idx.insertPositionsRange(*inserted_indexes->index(positions, 0), 0, positions.size());
// idx.check(getDictionary().size());
}
void ColumnLowCardinality::insertData(const char * pos, size_t length)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length));
// idx.check(getDictionary().size());
}
StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
@ -276,7 +267,6 @@ const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * po
const char * new_pos;
idx.insertPosition(dictionary.getColumnUnique().uniqueDeserializeAndInsertFromArena(pos, new_pos));
// idx.check(getDictionary().size());
return new_pos;
}

View File

@ -273,14 +273,6 @@ llvm::Value * ColumnNullable::compileComparator(llvm::IRBuilderBase & builder, l
b.CreateCondBr(lhs_or_rhs_are_null, lhs_or_rhs_are_null_block, lhs_rhs_are_not_null_block);
// if (unlikely(lval_is_null || rval_is_null))
// {
// if (lval_is_null && rval_is_null)
// return 0;
// else
// return lval_is_null ? null_direction_hint : -null_direction_hint;
// }
b.SetInsertPoint(lhs_or_rhs_are_null_block);
auto * lhs_equals_rhs_result = llvm::ConstantInt::getSigned(b.getInt8Ty(), 0);
llvm::Value * lhs_and_rhs_are_null = b.CreateAnd(lhs_is_null_value, rhs_is_null_value);
@ -288,8 +280,6 @@ llvm::Value * ColumnNullable::compileComparator(llvm::IRBuilderBase & builder, l
llvm::Value * lhs_or_rhs_are_null_block_result = b.CreateSelect(lhs_and_rhs_are_null, lhs_equals_rhs_result, lhs_is_null_result);
b.CreateBr(join_block);
// getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
b.SetInsertPoint(lhs_rhs_are_not_null_block);
llvm::Value * lhs_rhs_are_not_null_block_result
= nested_column->compileComparator(builder, lhs_unwrapped_value, rhs_unwrapped_value, nan_direction_hint);

View File

@ -548,7 +548,6 @@ MutableColumnPtr ColumnUnique<ColumnType>::uniqueInsertRangeImpl(
}
}
// checkIndexes(*positions_column, column->size() + (overflowed_keys ? overflowed_keys->size() : 0));
return std::move(positions_column);
}

View File

@ -514,8 +514,6 @@ private:
return allocateFromFreeRegion(*free_region, size);
}
// std::cerr << "Requested size: " << size << "\n";
/// Evict something from cache and continue.
while (true)
{

View File

@ -73,7 +73,7 @@ bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const S
/// Check that the right configuration has the same set of subkeys as the left configuration.
Poco::Util::AbstractConfiguration::Keys right_subkeys;
right.keys(right_key, right_subkeys);
std::unordered_set<StringRef> left_subkeys{subkeys.begin(), subkeys.end()};
std::unordered_set<std::string_view> left_subkeys{subkeys.begin(), subkeys.end()};
if ((left_subkeys.size() != right_subkeys.size()) || (left_subkeys.size() != subkeys.size()))
return false;
for (const auto & right_subkey : right_subkeys)

View File

@ -122,9 +122,6 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
values.time_at_offset_change_value = (transition.from - cctz::civil_second(date)) / Values::OffsetChangeFactor;
values.amount_of_offset_change_value = (transition.to - transition.from) / Values::OffsetChangeFactor;
// std::cerr << time_zone << ", " << date << ": change from " << transition.from << " to " << transition.to << "\n";
// std::cerr << time_zone << ", " << date << ": change at " << values.time_at_offset_change() << " with " << values.amount_of_offset_change() << "\n";
/// We don't support too large changes.
if (values.amount_of_offset_change_value > 24 * 4)
values.amount_of_offset_change_value = 24 * 4;

View File

@ -74,7 +74,6 @@ public:
using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
class Reader final : private Cell::State
{
@ -247,39 +246,6 @@ public:
}
}
/// Same, but return false if it's full.
bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted)
{
Cell * res = findCell(x);
it = iteratorTo(res);
inserted = res == buf + m_size;
if (inserted)
{
if (res == buf + capacity)
return false;
new(res) Cell(x, *this);
++m_size;
}
return true;
}
/// Copy the cell from another hash table. It is assumed that there was no such key in the table yet.
void ALWAYS_INLINE insertUnique(const Cell * cell)
{
memcpy(&buf[m_size], cell, sizeof(*cell));
++m_size;
}
void ALWAYS_INLINE insertUnique(Key x)
{
new(&buf[m_size]) Cell(x, *this);
++m_size;
}
iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); }
const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); }
@ -381,36 +347,3 @@ template
>
using SmallSet = SmallTable<Key, HashTableCell<Key, HashUnused>, capacity>;
template
<
typename Key,
typename Cell,
size_t capacity
>
class SmallMapTable : public SmallTable<Key, Cell, capacity>
{
public:
using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
mapped_type & ALWAYS_INLINE operator[](Key x)
{
typename SmallMapTable::iterator it;
bool inserted;
this->emplace(x, it, inserted);
new (&it->getMapped()) mapped_type();
return it->getMapped();
}
};
template
<
typename Key,
typename Mapped,
size_t capacity
>
using SmallMap = SmallMapTable<Key, HashMapCell<Key, Mapped, HashUnused>, capacity>;

View File

@ -355,8 +355,6 @@ private:
template <size_t PASS>
static inline void radixSortMSDInternal(Element * arr, size_t size, size_t limit)
{
// std::cerr << PASS << ", " << size << ", " << limit << "\n";
/// The beginning of every i-1-th bucket. 0th element will be equal to 1st.
/// Last element will point to array end.
std::unique_ptr<Element *[]> prev_buckets{new Element*[HISTOGRAM_SIZE + 1]};

View File

@ -9,7 +9,6 @@
#include <Common/StringSearcher.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/UTF8Helpers.h>
#include <base/StringRef.h>
#include <base/unaligned.h>
/** Search for a substring in a string by Volnitsky's algorithm

View File

@ -99,6 +99,7 @@ try
std::cout << "list\n";
zk.list("/",
Coordination::ListRequestType::ALL,
[&](const ListResponse & response)
{
if (response.error != Coordination::Error::ZOK)

87
src/Common/base58.h Normal file
View File

@ -0,0 +1,87 @@
#pragma once
#include <climits>
#include <cstring>
namespace DB
{
inline size_t encodeBase58(const char8_t * src, char8_t * dst)
{
const char * base58_encoding_alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
size_t idx = 0;
for (; *src; ++src)
{
unsigned int carry = static_cast<unsigned char>(*src);
for (size_t j = 0; j < idx; ++j)
{
carry += static_cast<unsigned int>(dst[j] << 8);
dst[j] = static_cast<unsigned char>(carry % 58);
carry /= 58;
}
while (carry > 0)
{
dst[idx++] = static_cast<unsigned char>(carry % 58);
carry /= 58;
}
}
size_t c_idx = idx >> 1;
for (size_t i = 0; i < c_idx; ++i)
{
char s = base58_encoding_alphabet[static_cast<unsigned char>(dst[i])];
dst[i] = base58_encoding_alphabet[static_cast<unsigned char>(dst[idx - (i + 1)])];
dst[idx - (i + 1)] = s;
}
if ((idx & 1))
{
dst[c_idx] = base58_encoding_alphabet[static_cast<unsigned char>(dst[c_idx])];
}
dst[idx] = '\0';
return idx + 1;
}
inline size_t decodeBase58(const char8_t * src, char8_t * dst)
{
const signed char uint_max = UINT_MAX;
const signed char map_digits[128]
= {uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 0, 1, 2, 3, 4, 5, 6, 7, 8, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 9, 10, 11, 12, 13, 14, 15, 16, uint_max, 17, 18, 19, 20, 21, uint_max, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
uint_max, uint_max, uint_max, uint_max, uint_max, uint_max, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, uint_max, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, uint_max, uint_max, uint_max, uint_max, uint_max};
size_t idx = 0;
for (; *src; ++src)
{
unsigned int carry = map_digits[*src];
if (unlikely(carry == UINT_MAX))
{
return 0;
}
for (size_t j = 0; j < idx; ++j)
{
carry += static_cast<unsigned char>(dst[j]) * 58;
dst[j] = static_cast<unsigned char>(carry & 0xff);
carry >>= 8;
}
while (carry > 0)
{
dst[idx++] = static_cast<unsigned char>(carry & 0xff);
carry >>= 8;
}
}
size_t c_idx = idx >> 1;
for (size_t i = 0; i < c_idx; ++i)
{
char s = dst[i];
dst[i] = dst[idx - (i + 1)];
dst[idx - (i + 1)] = s;
}
dst[idx] = '\0';
return idx + 1;
}
}

View File

@ -1,5 +1,4 @@
#include <iostream>
#include <iomanip>
#include <Interpreters/AggregationCommon.h>
@ -33,22 +32,6 @@ int main(int, char **)
std::cerr << "dump: " << wb.str() << std::endl;
}
{
using Cont = SmallMap<int, std::string, 16>;
Cont cont;
cont.insert(Cont::value_type(1, "Hello, world!"));
cont[1] = "Goodbye.";
for (auto x : cont)
std::cerr << x.getKey() << " -> " << x.getMapped() << std::endl;
DB::WriteBufferFromOwnString wb;
cont.writeText(wb);
std::cerr << "dump: " << wb.str() << std::endl;
}
{
using Cont = SmallSet<DB::UInt128, 16>;
Cont cont;

View File

@ -0,0 +1,33 @@
#include <base/defines.h> // ADDRESS_SANITIZER
#ifdef ADDRESS_SANITIZER
#include <cstdlib>
#include <thread>
#include <gtest/gtest.h>
#include <sanitizer/lsan_interface.h>
/// Test that ensures that LSan works.
///
/// Regression test for the case when it may not work,
/// because of broken getauxval() [1].
///
/// [1]: https://github.com/ClickHouse/ClickHouse/pull/33957
TEST(Common, LSan)
{
int sanitizers_exit_code = 1;
ASSERT_EXIT({
std::thread leak_in_thread([]()
{
void * leak = malloc(4096);
ASSERT_NE(leak, nullptr);
});
leak_in_thread.join();
__lsan_do_leak_check();
}, ::testing::ExitedWithCode(sanitizers_exit_code), ".*LeakSanitizer: detected memory leaks.*");
}
#endif

View File

@ -92,6 +92,8 @@ static int pollPid(pid_t pid, int timeout_in_ms)
}
#elif defined(OS_DARWIN) || defined(OS_FREEBSD)
#pragma clang diagnostic ignored "-Wreserved-identifier"
#include <sys/event.h>
#include <err.h>

View File

@ -106,21 +106,15 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
throw Exception(message.str(), ErrorCodes::CHECKSUM_DOESNT_MATCH);
}
/// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
/// Returns number of compressed bytes read.
size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy)
static void readHeaderAndGetCodecAndSize(
const char * compressed_buffer,
UInt8 header_size,
CompressionCodecPtr & codec,
size_t & size_decompressed,
size_t & size_compressed_without_checksum,
bool allow_different_codecs)
{
if (compressed_in->eof())
return 0;
UInt8 header_size = ICompressionCodec::getHeaderSize();
own_compressed_buffer.resize(header_size + sizeof(Checksum));
compressed_in->readStrict(own_compressed_buffer.data(), sizeof(Checksum) + header_size);
char * compressed_header = own_compressed_buffer.data() + sizeof(Checksum);
uint8_t method = ICompressionCodec::readMethod(compressed_header);
uint8_t method = ICompressionCodec::readMethod(compressed_buffer);
if (!codec)
{
@ -142,8 +136,8 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
}
}
size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(compressed_header);
size_decompressed = ICompressionCodec::readDecompressedBlockSize(compressed_header);
size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(compressed_buffer);
size_decompressed = ICompressionCodec::readDecompressedBlockSize(compressed_buffer);
/// This is for clang static analyzer.
assert(size_decompressed > 0);
@ -157,8 +151,27 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
if (size_compressed_without_checksum < header_size)
throw Exception("Can't decompress data: the compressed data size (" + toString(size_compressed_without_checksum)
+ ", this should include header size) is less than the header size (" + toString(header_size) + ")", ErrorCodes::CORRUPTED_DATA);
}
ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed_without_checksum + sizeof(Checksum));
/// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
/// Returns number of compressed bytes read.
size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy)
{
if (compressed_in->eof())
return 0;
UInt8 header_size = ICompressionCodec::getHeaderSize();
own_compressed_buffer.resize(header_size + sizeof(Checksum));
compressed_in->readStrict(own_compressed_buffer.data(), sizeof(Checksum) + header_size);
readHeaderAndGetCodecAndSize(
own_compressed_buffer.data() + sizeof(Checksum),
header_size,
codec,
size_decompressed,
size_compressed_without_checksum,
allow_different_codecs);
auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
@ -184,9 +197,55 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
}
ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed_without_checksum + sizeof(Checksum));
return size_compressed_without_checksum + sizeof(Checksum);
}
/// Read compressed data into compressed_buffer for asynchronous decompression to avoid the situation of "read compressed block across the compressed_in".
size_t CompressedReadBufferBase::readCompressedDataBlockForAsynchronous(size_t & size_decompressed, size_t & size_compressed_without_checksum)
{
UInt8 header_size = ICompressionCodec::getHeaderSize();
/// Make sure the whole header located in 'compressed_in->' buffer.
if (compressed_in->eof() || (compressed_in->available() < (header_size + sizeof(Checksum))))
return 0;
own_compressed_buffer.resize(header_size + sizeof(Checksum));
compressed_in->readStrict(own_compressed_buffer.data(), sizeof(Checksum) + header_size);
readHeaderAndGetCodecAndSize(
own_compressed_buffer.data() + sizeof(Checksum),
header_size,
codec,
size_decompressed,
size_compressed_without_checksum,
allow_different_codecs);
auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
/// Make sure the whole compressed block located in 'compressed_in->' buffer.
/// Otherwise, abandon header and restore original offset of compressed_in
if (compressed_in->offset() >= header_size + sizeof(Checksum) &&
compressed_in->available() >= (size_compressed_without_checksum - header_size) + additional_size_at_the_end_of_buffer + sizeof(Checksum))
{
compressed_in->position() -= header_size;
compressed_buffer = compressed_in->position();
compressed_in->position() += size_compressed_without_checksum;
if (!disable_checksum)
{
Checksum & checksum = *reinterpret_cast<Checksum *>(own_compressed_buffer.data());
validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
}
ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed_without_checksum + sizeof(Checksum));
return size_compressed_without_checksum + sizeof(Checksum);
}
else
{
compressed_in->position() -= (sizeof(Checksum) + header_size);
return 0;
}
}
static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_decompressed, CompressionCodecPtr & codec, bool allow_different_codecs)
{
@ -216,14 +275,12 @@ static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_de
}
}
void CompressedReadBufferBase::decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
{
readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
codec->decompress(compressed_buffer, size_compressed_without_checksum, to);
}
void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum)
{
readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
@ -245,6 +302,17 @@ void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_d
codec->decompress(compressed_buffer, size_compressed_without_checksum, to.begin());
}
void CompressedReadBufferBase::flushAsynchronousDecompressRequests() const
{
if (codec)
codec->flushAsynchronousDecompressRequests();
}
void CompressedReadBufferBase::setDecompressMode(ICompressionCodec::CodecMode mode) const
{
if (codec)
codec->setDecompressMode(mode);
}
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_)
@ -253,7 +321,7 @@ CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_d
}
CompressedReadBufferBase::~CompressedReadBufferBase() = default; /// Proper destruction of unique_ptr of forward-declared type.
CompressedReadBufferBase::~CompressedReadBufferBase() = default; /// Proper destruction of unique_ptr of forward-declared type.
}

View File

@ -39,6 +39,17 @@ protected:
/// Returns number of compressed bytes read.
size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy);
/// Read compressed data into compressed_buffer for asynchronous decompression to avoid the situation of "read compressed block across the compressed_in".
///
/// Compressed block may not be completely contained in "compressed_in" buffer which means compressed block may be read across the "compressed_in".
/// For native LZ4/ZSTD, it has no problem in facing situation above because they are synchronous.
/// But for asynchronous decompression, such as QPL deflate, it requires source and target buffer for decompression can not be overwritten until execution complete.
///
/// Returns number of compressed bytes read.
/// If Returns value > 0, means the address range for current block are maintained in "compressed_in", then asynchronous decompression can be called to boost performance.
/// If Returns value == 0, it means current block cannot be decompressed asynchronously.Meanwhile, asynchronous requests for previous blocks should be flushed if any.
size_t readCompressedDataBlockForAsynchronous(size_t & size_decompressed, size_t & size_compressed_without_checksum);
/// Decompress into memory pointed by `to`
void decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);
@ -46,6 +57,14 @@ protected:
/// It is more efficient for compression codec NONE but not suitable if you want to decompress into specific location.
void decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum);
/// Flush all asynchronous decompress request.
void flushAsynchronousDecompressRequests() const;
/// Set decompression mode: Synchronous/Asynchronous/SoftwareFallback.
/// The mode is "Synchronous" by default.
/// flushAsynchronousDecompressRequests must be called subsequently once set "Asynchronous" mode.
void setDecompressMode(ICompressionCodec::CodecMode mode) const;
public:
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
explicit CompressedReadBufferBase(ReadBuffer * in = nullptr, bool allow_different_codecs_ = false);

View File

@ -91,6 +91,9 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t
size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
{
size_t bytes_read = 0;
/// The codec mode is only relevant for codecs which support hardware offloading.
ICompressionCodec::CodecMode decompress_mode = ICompressionCodec::CodecMode::Synchronous;
bool read_tail = false;
/// If there are unread bytes in the buffer, then we copy needed to `to`.
if (pos < working_buffer.end())
@ -102,10 +105,28 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
size_t size_decompressed = 0;
size_t size_compressed_without_checksum = 0;
size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
///Try to read block which is entirely located in a single 'compressed_in->' buffer.
size_t new_size_compressed = readCompressedDataBlockForAsynchronous(size_decompressed, size_compressed_without_checksum);
if (new_size_compressed)
{
/// Current block is entirely located in a single 'compressed_in->' buffer.
/// We can set asynchronous decompression mode if supported to boost performance.
decompress_mode = ICompressionCodec::CodecMode::Asynchronous;
}
else
{
/// Current block cannot be decompressed asynchronously, means it probably span across two compressed_in buffers.
/// Meanwhile, asynchronous requests for previous blocks should be flushed if any.
flushAsynchronousDecompressRequests();
/// Fallback to generic API
new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
decompress_mode = ICompressionCodec::CodecMode::Synchronous;
}
size_compressed = 0; /// file_in no longer points to the end of the block in working_buffer.
if (!new_size_compressed)
return bytes_read;
break;
auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
@ -113,6 +134,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
/// need to skip some bytes in decompressed data (seek happened before readBig call).
if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
{
setDecompressMode(decompress_mode);
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
bytes_read += size_decompressed;
bytes += size_decompressed;
@ -127,6 +149,8 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
assert(size_decompressed + additional_size_at_the_end_of_buffer > 0);
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
/// Synchronous mode must be set since we need read partial data immediately from working buffer to target buffer.
setDecompressMode(ICompressionCodec::CodecMode::Synchronous);
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
/// Read partial data from first block. Won't run here at second block.
@ -145,15 +169,25 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
assert(size_decompressed + additional_size_at_the_end_of_buffer > 0);
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
// Asynchronous mode can be set here because working_buffer wouldn't be overwritten any more since this is the last block.
setDecompressMode(ICompressionCodec::CodecMode::Asynchronous);
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
///Read partial data from last block.
pos = working_buffer.begin();
bytes_read += read(to + bytes_read, n - bytes_read);
read_tail = true;
break;
}
}
/// Here we must make sure all asynchronous requests above are completely done.
flushAsynchronousDecompressRequests();
if (read_tail)
{
/// Manually take nextimpl_working_buffer_offset into account, because we don't use
/// nextImpl in this method.
pos = working_buffer.begin();
bytes_read += read(to + bytes_read, n - bytes_read);
}
return bytes_read;
}

View File

@ -0,0 +1,413 @@
#ifdef ENABLE_QPL_COMPRESSION
#include <cstdio>
#include <thread>
#include <Compression/CompressionCodecDeflateQpl.h>
#include <Compression/CompressionFactory.h>
#include <Compression/CompressionInfo.h>
#include <Parsers/ASTIdentifier.h>
#include <Poco/Logger.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
}
std::array<qpl_job *, DeflateQplJobHWPool::MAX_HW_JOB_NUMBER> DeflateQplJobHWPool::hw_job_ptr_pool;
std::array<std::atomic_bool, DeflateQplJobHWPool::MAX_HW_JOB_NUMBER> DeflateQplJobHWPool::hw_job_ptr_locks;
bool DeflateQplJobHWPool::job_pool_ready = false;
std::unique_ptr<uint8_t[]> DeflateQplJobHWPool::hw_jobs_buffer;
DeflateQplJobHWPool & DeflateQplJobHWPool::instance()
{
static DeflateQplJobHWPool pool;
return pool;
}
DeflateQplJobHWPool::DeflateQplJobHWPool()
:random_engine(std::random_device()())
,distribution(0, MAX_HW_JOB_NUMBER-1)
{
Poco::Logger * log = &Poco::Logger::get("DeflateQplJobHWPool");
UInt32 job_size = 0;
const char * qpl_version = qpl_get_library_version();
/// Get size required for saving a single qpl job object
qpl_get_job_size(qpl_path_hardware, &job_size);
/// Allocate entire buffer for storing all job objects
hw_jobs_buffer = std::make_unique<uint8_t[]>(job_size * MAX_HW_JOB_NUMBER);
/// Initialize pool for storing all job object pointers
/// Reallocate buffer by shifting address offset for each job object.
for (UInt32 index = 0; index < MAX_HW_JOB_NUMBER; ++index)
{
qpl_job * qpl_job_ptr = reinterpret_cast<qpl_job *>(hw_jobs_buffer.get() + index * job_size);
if (qpl_init_job(qpl_path_hardware, qpl_job_ptr) != QPL_STS_OK)
{
job_pool_ready = false;
LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Please check if Intel In-Memory Analytics Accelerator (IAA) is properly set up. QPL Version: {}.",qpl_version);
return;
}
hw_job_ptr_pool[index] = qpl_job_ptr;
unLockJob(index);
}
job_pool_ready = true;
LOG_DEBUG(log, "Hardware-assisted DeflateQpl codec is ready! QPL Version: {}",qpl_version);
}
DeflateQplJobHWPool::~DeflateQplJobHWPool()
{
for (UInt32 i = 0; i < MAX_HW_JOB_NUMBER; ++i)
{
if (hw_job_ptr_pool[i])
{
while (!tryLockJob(i));
qpl_fini_job(hw_job_ptr_pool[i]);
unLockJob(i);
hw_job_ptr_pool[i] = nullptr;
}
}
job_pool_ready = false;
}
qpl_job * DeflateQplJobHWPool::acquireJob(UInt32 &job_id)
{
if (isJobPoolReady())
{
UInt32 retry = 0;
auto index = distribution(random_engine);
while (!tryLockJob(index))
{
index = distribution(random_engine);
retry++;
if (retry > MAX_HW_JOB_NUMBER)
{
return nullptr;
}
}
job_id = MAX_HW_JOB_NUMBER - index;
assert(index < MAX_HW_JOB_NUMBER);
return hw_job_ptr_pool[index];
}
else
return nullptr;
}
void DeflateQplJobHWPool::releaseJob(UInt32 job_id)
{
if (isJobPoolReady())
unLockJob(MAX_HW_JOB_NUMBER - job_id);
}
bool DeflateQplJobHWPool::tryLockJob(UInt32 index)
{
bool expected = false;
assert(index < MAX_HW_JOB_NUMBER);
return hw_job_ptr_locks[index].compare_exchange_strong(expected, true);
}
void DeflateQplJobHWPool::unLockJob(UInt32 index)
{
assert(index < MAX_HW_JOB_NUMBER);
hw_job_ptr_locks[index].store(false);
}
//HardwareCodecDeflateQpl
HardwareCodecDeflateQpl::HardwareCodecDeflateQpl()
:log(&Poco::Logger::get("HardwareCodecDeflateQpl"))
{
}
HardwareCodecDeflateQpl::~HardwareCodecDeflateQpl()
{
#ifndef NDEBUG
assert(decomp_async_job_map.empty());
#else
if (!decomp_async_job_map.empty())
{
LOG_WARNING(log, "Find un-released job when HardwareCodecDeflateQpl destroy");
for (auto it : decomp_async_job_map)
{
DeflateQplJobHWPool::instance().releaseJob(it.first);
}
decomp_async_job_map.clear();
}
#endif
}
Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const
{
UInt32 job_id = 0;
qpl_job* job_ptr = nullptr;
UInt32 compressed_size = 0;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->acquireJob fail, probably job pool exhausted)");
return RET_ERROR;
}
job_ptr->op = qpl_op_compress;
job_ptr->next_in_ptr = reinterpret_cast<uint8_t *>(const_cast<char *>(source));
job_ptr->next_out_ptr = reinterpret_cast<uint8_t *>(dest);
job_ptr->available_in = source_size;
job_ptr->level = qpl_default_level;
job_ptr->available_out = dest_size;
job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_DYNAMIC_HUFFMAN | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY;
if (auto status = qpl_execute_job(job_ptr); status == QPL_STS_OK)
{
compressed_size = job_ptr->total_out;
DeflateQplJobHWPool::instance().releaseJob(job_id);
return compressed_size;
}
else
{
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
DeflateQplJobHWPool::instance().releaseJob(job_id);
return RET_ERROR;
}
}
Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
{
UInt32 job_id = 0;
qpl_job * job_ptr = nullptr;
UInt32 decompressed_size = 0;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->acquireJob fail, probably job pool exhausted)");
return RET_ERROR;
}
// Performing a decompression operation
job_ptr->op = qpl_op_decompress;
job_ptr->next_in_ptr = reinterpret_cast<uint8_t *>(const_cast<char *>(source));
job_ptr->next_out_ptr = reinterpret_cast<uint8_t *>(dest);
job_ptr->available_in = source_size;
job_ptr->available_out = uncompressed_size;
job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST;
if (auto status = qpl_submit_job(job_ptr); status != QPL_STS_OK)
{
DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
return RET_ERROR;
}
/// Busy waiting till job complete.
do
{
_tpause(1, __rdtsc() + 1000);
} while (qpl_check_job(job_ptr) == QPL_STS_BEING_PROCESSED);
decompressed_size = job_ptr->total_out;
DeflateQplJobHWPool::instance().releaseJob(job_id);
return decompressed_size;
}
Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
{
UInt32 job_id = 0;
qpl_job * job_ptr = nullptr;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->acquireJob fail, probably job pool exhausted)");
return RET_ERROR;
}
// Performing a decompression operation
job_ptr->op = qpl_op_decompress;
job_ptr->next_in_ptr = reinterpret_cast<uint8_t *>(const_cast<char *>(source));
job_ptr->next_out_ptr = reinterpret_cast<uint8_t *>(dest);
job_ptr->available_in = source_size;
job_ptr->available_out = uncompressed_size;
job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST;
if (auto status = qpl_submit_job(job_ptr); status == QPL_STS_OK)
{
decomp_async_job_map.insert({job_id, job_ptr});
return job_id;
}
else
{
DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
return RET_ERROR;
}
}
void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests()
{
UInt32 n_jobs_processing = decomp_async_job_map.size();
std::map<UInt32, qpl_job *>::iterator it = decomp_async_job_map.begin();
while (n_jobs_processing)
{
UInt32 job_id = 0;
qpl_job * job_ptr = nullptr;
job_id = it->first;
job_ptr = it->second;
if (qpl_check_job(job_ptr) == QPL_STS_BEING_PROCESSED)
{
it++;
}
else
{
it = decomp_async_job_map.erase(it);
DeflateQplJobHWPool::instance().releaseJob(job_id);
n_jobs_processing--;
if (n_jobs_processing <= 0)
break;
}
if (it == decomp_async_job_map.end())
{
it = decomp_async_job_map.begin();
_tpause(1, __rdtsc() + 1000);
}
}
}
SoftwareCodecDeflateQpl::~SoftwareCodecDeflateQpl()
{
if (!sw_job)
qpl_fini_job(sw_job);
}
qpl_job * SoftwareCodecDeflateQpl::getJobCodecPtr()
{
if (!sw_job)
{
UInt32 size = 0;
qpl_get_job_size(qpl_path_software, &size);
sw_buffer = std::make_unique<uint8_t[]>(size);
sw_job = reinterpret_cast<qpl_job *>(sw_buffer.get());
// Job initialization
if (auto status = qpl_init_job(qpl_path_software, sw_job); status != QPL_STS_OK)
throw Exception(ErrorCodes::CANNOT_COMPRESS,
"Initialization of DeflateQpl software fallback codec failed. (Details: qpl_init_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
}
return sw_job;
}
UInt32 SoftwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size)
{
qpl_job * job_ptr = getJobCodecPtr();
// Performing a compression operation
job_ptr->op = qpl_op_compress;
job_ptr->next_in_ptr = reinterpret_cast<uint8_t *>(const_cast<char *>(source));
job_ptr->next_out_ptr = reinterpret_cast<uint8_t *>(dest);
job_ptr->available_in = source_size;
job_ptr->available_out = dest_size;
job_ptr->level = qpl_default_level;
job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_DYNAMIC_HUFFMAN | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY;
if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK)
throw Exception(ErrorCodes::CANNOT_COMPRESS,
"Execution of DeflateQpl software fallback codec failed. (Details: qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
return job_ptr->total_out;
}
void SoftwareCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
{
qpl_job * job_ptr = getJobCodecPtr();
// Performing a decompression operation
job_ptr->op = qpl_op_decompress;
job_ptr->next_in_ptr = reinterpret_cast<uint8_t *>(const_cast<char *>(source));
job_ptr->next_out_ptr = reinterpret_cast<uint8_t *>(dest);
job_ptr->available_in = source_size;
job_ptr->available_out = uncompressed_size;
job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST;
if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS,
"Execution of DeflateQpl software fallback codec failed. (Details: qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
}
//CompressionCodecDeflateQpl
CompressionCodecDeflateQpl::CompressionCodecDeflateQpl()
:hw_codec(std::make_unique<HardwareCodecDeflateQpl>())
,sw_codec(std::make_unique<SoftwareCodecDeflateQpl>())
{
setCodecDescription("DEFLATE_QPL");
}
uint8_t CompressionCodecDeflateQpl::getMethodByte() const
{
return static_cast<uint8_t>(CompressionMethodByte::DeflateQpl);
}
void CompressionCodecDeflateQpl::updateHash(SipHash & hash) const
{
getCodecDesc()->updateTreeHash(hash);
}
UInt32 CompressionCodecDeflateQpl::getMaxCompressedDataSize(UInt32 uncompressed_size) const
{
/// Aligned with ZLIB
return ((uncompressed_size) + ((uncompressed_size) >> 12) + ((uncompressed_size) >> 14) + ((uncompressed_size) >> 25) + 13);
}
UInt32 CompressionCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest) const
{
Int32 res = HardwareCodecDeflateQpl::RET_ERROR;
if (DeflateQplJobHWPool::instance().isJobPoolReady())
res = hw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size));
if (res == HardwareCodecDeflateQpl::RET_ERROR)
res = sw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size));
return res;
}
void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
{
switch (getDecompressMode())
{
case CodecMode::Synchronous:
{
Int32 res = HardwareCodecDeflateQpl::RET_ERROR;
if (DeflateQplJobHWPool::instance().isJobPoolReady())
{
res = hw_codec->doDecompressDataSynchronous(source, source_size, dest, uncompressed_size);
if (res == HardwareCodecDeflateQpl::RET_ERROR)
sw_codec->doDecompressData(source, source_size, dest, uncompressed_size);
}
else
sw_codec->doDecompressData(source, source_size, dest, uncompressed_size);
return;
}
case CodecMode::Asynchronous:
{
Int32 res = HardwareCodecDeflateQpl::RET_ERROR;
if (DeflateQplJobHWPool::instance().isJobPoolReady())
res = hw_codec->doDecompressDataAsynchronous(source, source_size, dest, uncompressed_size);
if (res == HardwareCodecDeflateQpl::RET_ERROR)
sw_codec->doDecompressData(source, source_size, dest, uncompressed_size);
return;
}
case CodecMode::SoftwareFallback:
sw_codec->doDecompressData(source, source_size, dest, uncompressed_size);
return;
}
__builtin_unreachable();
}
void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests()
{
if (DeflateQplJobHWPool::instance().isJobPoolReady())
hw_codec->flushAsynchronousDecompressRequests();
/// After flush previous all async requests, we must restore mode to be synchronous by default.
setDecompressMode(CodecMode::Synchronous);
}
void registerCodecDeflateQpl(CompressionCodecFactory & factory)
{
factory.registerSimpleCompressionCodec(
"DEFLATE_QPL", static_cast<char>(CompressionMethodByte::DeflateQpl), [&]() { return std::make_shared<CompressionCodecDeflateQpl>(); });
}
}
#endif

View File

@ -0,0 +1,120 @@
#pragma once
#include <Compression/ICompressionCodec.h>
#include <qpl/qpl.h>
#include <random>
namespace Poco
{
class Logger;
}
namespace DB
{
/// DeflateQplJobHWPool is resource pool to provide the job objects.
/// Job object is used for storing context information during offloading compression job to HW Accelerator.
class DeflateQplJobHWPool
{
public:
DeflateQplJobHWPool();
~DeflateQplJobHWPool();
qpl_job * acquireJob(UInt32 &job_id);
static void releaseJob(UInt32 job_id);
static const bool & isJobPoolReady() { return job_pool_ready; }
static DeflateQplJobHWPool & instance();
private:
static bool tryLockJob(UInt32 index);
static void unLockJob(UInt32 index);
/// Maximum jobs running in parallel supported by IAA hardware
static constexpr auto MAX_HW_JOB_NUMBER = 1024;
/// Entire buffer for storing all job objects
static std::unique_ptr<uint8_t[]> hw_jobs_buffer;
/// Job pool for storing all job object pointers
static std::array<qpl_job *, DeflateQplJobHWPool::MAX_HW_JOB_NUMBER> hw_job_ptr_pool;
/// Locks for accessing each job object pointers
static std::array<std::atomic_bool, DeflateQplJobHWPool::MAX_HW_JOB_NUMBER> hw_job_ptr_locks;
static bool job_pool_ready;
std::mt19937 random_engine;
std::uniform_int_distribution<int> distribution;
};
class SoftwareCodecDeflateQpl
{
public:
~SoftwareCodecDeflateQpl();
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size);
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size);
private:
qpl_job * sw_job = nullptr;
std::unique_ptr<uint8_t[]> sw_buffer;
qpl_job * getJobCodecPtr();
};
class HardwareCodecDeflateQpl
{
public:
/// RET_ERROR stands for hardware codec fail,need fallback to software codec.
static constexpr Int32 RET_ERROR = -1;
HardwareCodecDeflateQpl();
~HardwareCodecDeflateQpl();
Int32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const;
///Submit job request to the IAA hardware and then busy waiting till it complete.
Int32 doDecompressDataSynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size);
///Submit job request to the IAA hardware and return immediately. IAA hardware will process decompression jobs automatically.
Int32 doDecompressDataAsynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size);
/// Flush result for all previous requests which means busy waiting till all the jobs in "decomp_async_job_map" are finished.
/// Must be called subsequently after several calls of doDecompressDataReq.
void flushAsynchronousDecompressRequests();
private:
/// Asynchronous job map for decompression: job ID - job object.
/// For each submission, push job ID && job object into this map;
/// For flush, pop out job ID && job object from this map. Use job ID to release job lock and use job object to check job status till complete.
std::map<UInt32, qpl_job *> decomp_async_job_map;
Poco::Logger * log;
};
class CompressionCodecDeflateQpl : public ICompressionCodec
{
public:
CompressionCodecDeflateQpl();
uint8_t getMethodByte() const override;
void updateHash(SipHash & hash) const override;
protected:
bool isCompression() const override
{
return true;
}
bool isGenericCompression() const override
{
return true;
}
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
///Flush result for previous asynchronous decompression requests on asynchronous mode.
void flushAsynchronousDecompressRequests() override;
private:
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
std::unique_ptr<HardwareCodecDeflateQpl> hw_codec;
std::unique_ptr<SoftwareCodecDeflateQpl> sw_codec;
};
}

View File

@ -166,7 +166,7 @@ void registerCodecLZ4(CompressionCodecFactory & factory);
void registerCodecLZ4HC(CompressionCodecFactory & factory);
void registerCodecZSTD(CompressionCodecFactory & factory);
void registerCodecMultiple(CompressionCodecFactory & factory);
void registerCodecDeflateQpl(CompressionCodecFactory & factory);
/// Keeper use only general-purpose codecs, so we don't need these special codecs
/// in standalone build
@ -188,7 +188,6 @@ CompressionCodecFactory::CompressionCodecFactory()
registerCodecZSTD(*this);
registerCodecLZ4HC(*this);
registerCodecMultiple(*this);
#ifndef KEEPER_STANDALONE_BUILD
registerCodecDelta(*this);
registerCodecT64(*this);
@ -196,6 +195,9 @@ CompressionCodecFactory::CompressionCodecFactory()
registerCodecGorilla(*this);
registerCodecEncrypted(*this);
registerCodecFPC(*this);
#ifdef ENABLE_QPL_COMPRESSION
registerCodecDeflateQpl(*this);
#endif
#endif
default_codec = get("LZ4", {});

View File

@ -45,7 +45,8 @@ enum class CompressionMethodByte : uint8_t
Gorilla = 0x95,
AES_128_GCM_SIV = 0x96,
AES_256_GCM_SIV = 0x97,
FPC = 0x98
FPC = 0x98,
DeflateQpl = 0x99,
};
}

View File

@ -91,7 +91,6 @@ UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char
return header_size + compressed_bytes_written;
}
UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, char * dest) const
{
assert(source != nullptr && dest != nullptr);

View File

@ -45,9 +45,37 @@ public:
/// Compressed bytes from uncompressed source to dest. Dest should preallocate memory
UInt32 compress(const char * source, UInt32 source_size, char * dest) const;
/// Decompress bytes from compressed source to dest. Dest should preallocate memory
/// Decompress bytes from compressed source to dest. Dest should preallocate memory;
UInt32 decompress(const char * source, UInt32 source_size, char * dest) const;
/// Three kinds of codec mode:
/// Synchronous mode which is commonly used by default;
/// --- For the codec with HW decompressor, it means submit request to HW and busy wait till complete.
/// Asynchronous mode which required HW decompressor support;
/// --- For the codec with HW decompressor, it means submit request to HW and return immediately.
/// --- Must be used in pair with flushAsynchronousDecompressRequests.
/// SoftwareFallback mode is exclusively defined for the codec with HW decompressor, enable its capability of "fallback to SW codec".
enum class CodecMode
{
Synchronous,
Asynchronous,
SoftwareFallback
};
/// Get current decompression mode
CodecMode getDecompressMode() const{ return decompressMode; }
/// if set mode to CodecMode::Asynchronous, must be followed with flushAsynchronousDecompressRequests
void setDecompressMode(CodecMode mode){ decompressMode = mode; }
/// Flush result for previous asynchronous decompression requests.
/// This function must be called following several requests offload to HW.
/// To make sure asynchronous results have been flushed into target buffer completely.
/// Meanwhile, source and target buffer for decompression can not be overwritten until this function execute completely.
/// Otherwise it would conflict with HW offloading and cause exception.
/// For QPL deflate, it support the maximum number of requests equal to DeflateQplJobHWPool::jobPoolSize
virtual void flushAsynchronousDecompressRequests(){}
/// Number of bytes, that will be used to compress uncompressed_size bytes with current codec
virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const
{
@ -103,6 +131,7 @@ protected:
private:
ASTPtr full_codec_desc;
CodecMode decompressMode{CodecMode::Synchronous};
};
}

View File

@ -24,6 +24,12 @@
#include <arm_neon.h>
#endif
static inline UInt16 LZ4_readLE16(const void* mem_ptr)
{
const UInt8* p = reinterpret_cast<const UInt8*>(mem_ptr);
return static_cast<UInt16>(p[0]) + (p[1] << 8);
}
namespace LZ4
{
@ -561,7 +567,7 @@ bool NO_INLINE decompressImpl(
/// Get match offset.
size_t offset = unalignedLoad<UInt16>(ip);
size_t offset = LZ4_readLE16(ip);
ip += 2;
const UInt8 * match = op - offset;

View File

@ -769,6 +769,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \
M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \
\
M(Bool, exact_rows_before_limit, false, "When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely", 0) \
M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there're joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \
\
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \

View File

@ -511,8 +511,6 @@ void SerializationLowCardinality::serializeBinaryBulkWithMultipleStreams(
/// Insert used_keys into global dictionary and update sub_index.
auto indexes_with_overflow = global_dictionary->uniqueInsertRangeWithOverflow(*keys, 0, keys->size(),
settings.low_cardinality_max_dictionary_size);
// size_t max_size = settings.low_cardinality_max_dictionary_size + indexes_with_overflow.overflowed_keys->size();
// ColumnLowCardinality::Index(indexes_with_overflow.indexes->getPtr()).check(max_size);
if (global_dictionary->size() > settings.low_cardinality_max_dictionary_size)
throw Exception("Got dictionary with size " + toString(global_dictionary->size()) +
@ -656,11 +654,6 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
{
auto maps = mapIndexWithAdditionalKeys(*indexes_column, global_dictionary->size());
// ColumnLowCardinality::Index(maps.additional_keys_map->getPtr()).check(additional_keys->size());
// ColumnLowCardinality::Index(indexes_column->getPtr()).check(
// maps.dictionary_map->size() + maps.additional_keys_map->size());
auto used_keys = IColumn::mutate(global_dictionary->getNestedColumn()->index(*maps.dictionary_map, 0));
if (!maps.additional_keys_map->empty())

View File

@ -78,9 +78,6 @@ private:
template <typename ... Params, typename... Args>
void deserializeImpl(IColumn & column, DeserializeFunctionPtr<Params...> func, Args &&... args) const;
// template <typename Creator>
// static MutableColumnUniquePtr createColumnUniqueImpl(const IDataType & keys_type, const Creator & creator);
};
}

View File

@ -27,11 +27,6 @@
#include <Dictionaries/DictionaryHelpers.h>
namespace CurrentMetrics
{
extern const Metric Write;
}
namespace ProfileEvents
{
extern const Event FileOpen;
@ -527,8 +522,6 @@ public:
throw Exception(ErrorCodes::CANNOT_IO_SUBMIT, "Cannot submit request for asynchronous IO on file {}", file_path);
}
// CurrentMetrics::Increment metric_increment_write{CurrentMetrics::Write};
io_event event;
while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) < 0)

View File

@ -275,8 +275,6 @@ void registerDictionarySourceJDBC(DictionarySourceFactory & factory)
bool /* created_from_ddl */) -> DictionarySourcePtr {
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Dictionary source of type `jdbc` is disabled until consistent support for nullable fields.");
// BridgeHelperPtr bridge = std::make_shared<XDBCBridgeHelper<JDBCBridgeMixin>>(config, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".connection_string"));
// return std::make_unique<XDBCDictionarySource>(dict_struct, config, config_prefix + ".jdbc", sample_block, context, bridge);
};
factory.registerSource("jdbc", create_table_source);
}

View File

@ -74,7 +74,6 @@ void DiskWebServer::initialize(const String & uri_path) const
if (file_data.type == FileType::Directory)
{
directories_to_load.push_back(file_path);
// file_path = fs::path(file_path) / "";
}
file_path = file_path.substr(url.size());

View File

@ -168,6 +168,8 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait};
size_t size = 0;
size_t bytes_read = 0;
if (prefetch_future.valid())
{
ProfileEvents::increment(ProfileEvents::RemoteFSPrefetchedReads);
@ -181,6 +183,8 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
/// If prefetch_future is valid, size should always be greater than zero.
assert(offset <= size);
bytes_read = size - offset;
ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds());
}
@ -200,9 +204,11 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
auto offset = result.offset;
LOG_TEST(log, "Current size: {}, offset: {}", size, offset);
assert(offset <= size);
if (size)
assert(offset <= size);
bytes_read = size - offset;
if (bytes_read)
{
/// Adjust the working buffer so that it ignores `offset` bytes.
internal_buffer = Buffer(memory.data(), memory.data() + memory.size());
@ -222,7 +228,7 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
assert(file_offset_of_buffer_end <= impl->getFileSize());
prefetch_future = {};
return size;
return bytes_read;
}

View File

@ -55,10 +55,6 @@ if (TARGET ch_contrib::llvm)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::llvm)
endif ()
if (TARGET ch_contrib::base-x)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base-x)
endif()
if (TARGET ch_contrib::base64)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base64)
endif()

View File

@ -63,12 +63,10 @@ struct ToStartOfWeekImpl
static inline UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
// return time_zone.toFirstDayNumOfWeek(t, week_mode);
}
static inline UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
// return time_zone.toFirstDayNumOfWeek(t, week_mode);
}
static inline UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{

View File

@ -1,15 +1,13 @@
#pragma once
#include "config_functions.h"
#if USE_BASEX
# include <Columns/ColumnConst.h>
# include <Common/MemorySanitizer.h>
# include <Columns/ColumnString.h>
# include <DataTypes/DataTypeString.h>
# include <Functions/FunctionFactory.h>
# include <Functions/FunctionHelpers.h>
# include <IO/WriteHelpers.h>
# include <base_x.hh>
#include <Columns/ColumnConst.h>
#include <Common/MemorySanitizer.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/base58.h>
#include <cstring>
namespace DB
@ -26,72 +24,37 @@ struct Base58Encode
{
static constexpr auto name = "base58Encode";
static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count)
static void process(const ColumnString & src_column, ColumnString::MutablePtr & dst_column, size_t input_rows_count)
{
auto & dst_data = dst_column->getChars();
auto & dst_offsets = dst_column->getOffsets();
/// Wikipedia states Base58 has efficiency of 73%, and we take 1.5 scale to avoid reallocation in most cases
size_t current_allocated_size = ceil(1.5 * input.getChars().size());
/// Base58 has efficiency of 73% (8/11) [https://monerodocs.org/cryptography/base58/],
/// and we take double scale to avoid any reallocation.
dst_data.resize(current_allocated_size);
size_t max_result_size = ceil(2 * src_column.getChars().size() + 1);
dst_data.resize(max_result_size);
dst_offsets.resize(input_rows_count);
const ColumnString::Offsets & src_offsets = input.getOffsets();
const ColumnString::Offsets & src_offsets = src_column.getOffsets();
const auto * source = input.getChars().raw_data();
const auto * src = src_column.getChars().data();
auto * dst = dst_data.data();
auto * dst_pos = dst;
size_t src_offset_prev = 0;
size_t processed_size = 0;
const auto& encoder = (alphabet == "bitcoin") ? Base58::bitcoin() :
((alphabet == "flickr") ? Base58::flickr() :
((alphabet == "ripple") ? Base58::ripple() :
Base58::base58())); //GMP
std::string encoded;
for (size_t row = 0; row < input_rows_count; ++row)
{
size_t srclen = src_offsets[row] - src_offset_prev - 1;
/// Why we didn't use char* here?
/// We don't know the size of the result string beforehand (it's not byte-to-byte encoding),
/// so we may need to do many resizes (the worst case -- we'll do it for each row)
/// This way we do exponential resizes and one final resize after whole operation is complete
encoded.clear();
if (srclen)
try
{
encoder.encode(encoded, source, srclen);
}
catch (const std::invalid_argument& e)
{
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
}
catch (const std::domain_error& e)
{
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
}
size_t outlen = encoded.size();
size_t srclen = src_offsets[row] - src_offset_prev;
auto encoded_size = encodeBase58(src, dst_pos);
if (processed_size + outlen >= current_allocated_size)
{
current_allocated_size += current_allocated_size;
dst_data.resize(current_allocated_size);
auto processed_offset = dst_pos - dst;
dst = dst_data.data();
dst_pos = dst;
dst_pos += processed_offset;
}
std::memcpy(dst_pos, encoded.c_str(), ++outlen);
source += srclen + 1;
dst_pos += outlen;
src += srclen;
dst_pos += encoded_size;
dst_offsets[row] = dst_pos - dst;
src_offset_prev = src_offsets[row];
processed_size += outlen;
}
dst_data.resize(dst_pos - dst);
@ -102,72 +65,40 @@ struct Base58Decode
{
static constexpr auto name = "base58Decode";
static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count)
static void process(const ColumnString & src_column, ColumnString::MutablePtr & dst_column, size_t input_rows_count)
{
auto & dst_data = dst_column->getChars();
auto & dst_offsets = dst_column->getOffsets();
/// We allocate probably even more then needed to avoid many resizes
size_t current_allocated_size = input.getChars().size();
/// Base58 has efficiency of 73% (8/11) [https://monerodocs.org/cryptography/base58/],
/// and decoded value will be no longer than source.
dst_data.resize(current_allocated_size);
size_t max_result_size = src_column.getChars().size() + 1;
dst_data.resize(max_result_size);
dst_offsets.resize(input_rows_count);
const ColumnString::Offsets & src_offsets = input.getOffsets();
const ColumnString::Offsets & src_offsets = src_column.getOffsets();
const auto * source = input.getChars().raw_data();
const auto * src = src_column.getChars().data();
auto * dst = dst_data.data();
auto * dst_pos = dst;
size_t src_offset_prev = 0;
size_t processed_size = 0;
const auto& decoder = (alphabet == "bitcoin") ? Base58::bitcoin() :
((alphabet == "flickr") ? Base58::flickr() :
((alphabet == "ripple") ? Base58::ripple() :
Base58::base58()));
std::string decoded;
for (size_t row = 0; row < input_rows_count; ++row)
{
size_t srclen = src_offsets[row] - src_offset_prev - 1;
/// Why we didn't use char* here?
/// We don't know the size of the result string beforehand (it's not byte-to-byte encoding),
/// so we may need to do many resizes (the worst case -- we'll do it for each row)
/// This way we do exponential resizes and one final resize after whole operation is complete
decoded.clear();
if (srclen)
try
{
decoder.decode(decoded, source, srclen);
}
catch (const std::invalid_argument& e)
{
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
}
catch (const std::domain_error& e)
{
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
}
size_t outlen = decoded.size();
size_t srclen = src_offsets[row] - src_offset_prev;
if (processed_size + outlen >= current_allocated_size)
{
current_allocated_size += current_allocated_size;
dst_data.resize(current_allocated_size);
auto processed_offset = dst_pos - dst;
dst = dst_data.data();
dst_pos = dst;
dst_pos += processed_offset;
}
std::memcpy(dst_pos, decoded.c_str(), ++outlen);
auto decoded_size = decodeBase58(src, dst_pos);
if (!decoded_size)
throw Exception("Invalid Base58 value, cannot be decoded", ErrorCodes::BAD_ARGUMENTS);
source += srclen + 1;
dst_pos += outlen;
src += srclen;
dst_pos += decoded_size;
dst_offsets[row] = dst_pos - dst;
src_offset_prev = src_offsets[row];
processed_size += outlen;
}
dst_data.resize(dst_pos - dst);
@ -190,9 +121,7 @@ public:
return Func::name;
}
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
size_t getNumberOfArguments() const override { return 1; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
@ -202,19 +131,12 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 1 && arguments.size() != 2)
throw Exception(
"Wrong number of arguments for function " + getName() + ": 1 or 2 expected.",
ErrorCodes::BAD_ARGUMENTS);
if (arguments.size() != 1)
throw Exception("Wrong number of arguments for function " + getName() + ": 1 expected.", ErrorCodes::BAD_ARGUMENTS);
if (!isString(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of 1st argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 2 && !isString(arguments[1].type))
throw Exception(
"Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName() + ". Must be String.",
"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
@ -229,28 +151,11 @@ public:
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName() + ", must be String",
ErrorCodes::ILLEGAL_COLUMN);
std::string alphabet = "bitcoin";
if (arguments.size() == 2)
{
const auto * alphabet_column = checkAndGetColumn<ColumnConst>(arguments[1].column.get());
if (!alphabet_column)
throw Exception("Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
alphabet = alphabet_column->getValue<DB::String>();
if (alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp")
throw Exception("Second argument for function " + getName() + " must be 'bitcoin', 'ripple', 'gmp' or 'flickr'", ErrorCodes::ILLEGAL_COLUMN);
}
auto dst_column = ColumnString::create();
Func::process(*input, dst_column, alphabet, input_rows_count);
Func::process(*input, dst_column, input_rows_count);
return dst_column;
}
};
}
#endif

View File

@ -1,5 +1,4 @@
#include <Functions/FunctionBase58Conversion.h>
#if USE_BASEX
#include <Functions/FunctionFactory.h>
namespace DB
@ -14,4 +13,3 @@ void registerFunctionBase58Decode(FunctionFactory & factory)
factory.registerFunction<FunctionBase58Conversion<Base58Decode>>();
}
}
#endif

View File

@ -1091,8 +1091,6 @@ struct ConvertThroughParsing
static constexpr bool to_datetime64 = std::is_same_v<ToDataType, DataTypeDateTime64>;
// using ToFieldType = typename ToDataType::FieldType;
static bool isAllRead(ReadBuffer & in)
{
/// In case of FixedString, skip zero bytes at end.

View File

@ -22,7 +22,7 @@ namespace ErrorCodes
struct L1Distance
{
static inline String name = "L1";
static constexpr auto name = "L1";
struct ConstParams {};
@ -53,7 +53,7 @@ struct L1Distance
struct L2Distance
{
static inline String name = "L2";
static constexpr auto name = "L2";
struct ConstParams {};
@ -84,7 +84,7 @@ struct L2Distance
struct L2SquaredDistance : L2Distance
{
static inline String name = "L2Squared";
static constexpr auto name = "L2Squared";
template <typename ResultType>
static ResultType finalize(const State<ResultType> & state, const ConstParams &)
@ -95,7 +95,7 @@ struct L2SquaredDistance : L2Distance
struct LpDistance
{
static inline String name = "Lp";
static constexpr auto name = "Lp";
struct ConstParams
{
@ -130,7 +130,7 @@ struct LpDistance
struct LinfDistance
{
static inline String name = "Linf";
static constexpr auto name = "Linf";
struct ConstParams {};
@ -161,7 +161,7 @@ struct LinfDistance
struct CosineDistance
{
static inline String name = "Cosine";
static constexpr auto name = "Cosine";
struct ConstParams {};
@ -200,8 +200,7 @@ template <class Kernel>
class FunctionArrayDistance : public IFunction
{
public:
static inline auto name = "array" + Kernel::name + "Distance";
String getName() const override { return name; }
String getName() const override { static auto name = String("array") + Kernel::name + "Distance"; return name; }
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayDistance<Kernel>>(); }
size_t getNumberOfArguments() const override { return 2; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
struct L1Norm
{
static inline String name = "L1";
static constexpr auto name = "L1";
struct ConstParams {};
@ -46,7 +46,7 @@ struct L1Norm
struct L2Norm
{
static inline String name = "L2";
static constexpr auto name = "L2";
struct ConstParams {};
@ -71,7 +71,7 @@ struct L2Norm
struct L2SquaredNorm : L2Norm
{
static inline String name = "L2Squared";
static constexpr auto name = "L2Squared";
template <typename ResultType>
inline static ResultType finalize(ResultType result, const ConstParams &)
@ -83,7 +83,7 @@ struct L2SquaredNorm : L2Norm
struct LpNorm
{
static inline String name = "Lp";
static constexpr auto name = "Lp";
struct ConstParams
{
@ -112,7 +112,7 @@ struct LpNorm
struct LinfNorm
{
static inline String name = "Linf";
static constexpr auto name = "Linf";
struct ConstParams {};
@ -140,8 +140,7 @@ template <class Kernel>
class FunctionArrayNorm : public IFunction
{
public:
static inline auto name = "array" + Kernel::name + "Norm";
String getName() const override { return name; }
String getName() const override { static auto name = String("array") + Kernel::name + "Norm"; return name; }
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayNorm<Kernel>>(); }
size_t getNumberOfArguments() const override { return 1; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }

View File

@ -44,7 +44,10 @@ public:
return 0;
}
bool isDeterministic() const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isDeterministicInScopeOfQuery() const override
{

View File

@ -2,7 +2,6 @@
// .h autogenerated by cmake!
#cmakedefine01 USE_BASEX
#cmakedefine01 USE_BASE64
#cmakedefine01 USE_SIMDJSON
#cmakedefine01 USE_RAPIDJSON

View File

@ -1,9 +1,6 @@
if (TARGET ch_contrib::fastops)
set(USE_FASTOPS 1)
endif()
if (TARGET ch_contrib::base-x)
set(USE_BASEX 1)
endif()
if (TARGET ch_contrib::base64)
set(USE_BASE64 1)
endif()

View File

@ -3,7 +3,6 @@
#include <Functions/IFunction.h>
#include <Core/DecimalFunctions.h>
#include <Functions/FunctionFactory.h>
#include <Core/Field.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>

View File

@ -0,0 +1,88 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Columns/ColumnsNumber.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
/** Returns current time at calculation of every block.
* In contrast to 'now' function, it's not a constant expression and is not a subject of constant folding.
*/
class FunctionNowInBlock : public IFunction
{
public:
static constexpr auto name = "nowInBlock";
static FunctionPtr create(ContextPtr)
{
return std::make_shared<FunctionNowInBlock>();
}
String getName() const override
{
return name;
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
{
return false;
}
/// Optional timezone argument.
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool isDeterministic() const override
{
return false;
}
bool isDeterministicInScopeOfQuery() const override
{
return false;
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() > 1)
{
throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type))
{
throw Exception(
"Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (arguments.size() == 1)
{
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0));
}
return std::make_shared<DataTypeDateTime>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
{
return ColumnUInt32::create(input_rows_count, time(nullptr));
}
};
}
void registerFunctionNowInBlock(FunctionFactory & factory)
{
factory.registerFunction<FunctionNowInBlock>();
}
}

View File

@ -43,6 +43,7 @@ void registerFunctionToRelativeMinuteNum(FunctionFactory &);
void registerFunctionToRelativeSecondNum(FunctionFactory &);
void registerFunctionToTime(FunctionFactory &);
void registerFunctionNow(FunctionFactory &);
void registerFunctionNowInBlock(FunctionFactory &);
void registerFunctionNow64(FunctionFactory &);
void registerFunctionToday(FunctionFactory &);
void registerFunctionYesterday(FunctionFactory &);
@ -126,6 +127,7 @@ void registerFunctionsDateTime(FunctionFactory & factory)
registerFunctionToTime(factory);
registerFunctionNow(factory);
registerFunctionNow64(factory);
registerFunctionNowInBlock(factory);
registerFunctionToday(factory);
registerFunctionYesterday(factory);
registerFunctionTimeSlot(factory);

View File

@ -49,10 +49,8 @@ void registerFunctionBase64Decode(FunctionFactory &);
void registerFunctionTryBase64Decode(FunctionFactory &);
#endif
#if USE_BASEX
void registerFunctionBase58Encode(FunctionFactory &);
void registerFunctionBase58Decode(FunctionFactory &);
#endif
#if USE_NLP
void registerFunctionStem(FunctionFactory &);
@ -110,10 +108,8 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionTryBase64Decode(factory);
#endif
#if USE_BASEX
registerFunctionBase58Encode(factory);
registerFunctionBase58Decode(factory);
#endif
#if USE_NLP
registerFunctionStem(factory);

View File

@ -1124,7 +1124,7 @@ template <class Traits>
class TupleOrArrayFunction : public IFunction
{
public:
static inline String name = Traits::name;
static constexpr auto name = Traits::name;
explicit TupleOrArrayFunction(ContextPtr context_)
: IFunction()
@ -1173,7 +1173,7 @@ extern FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_);
struct L1NormTraits
{
static inline String name = "L1Norm";
static constexpr auto name = "L1Norm";
static constexpr auto CreateTupleFunction = FunctionL1Norm::create;
static constexpr auto CreateArrayFunction = createFunctionArrayL1Norm;
@ -1181,7 +1181,7 @@ struct L1NormTraits
struct L2NormTraits
{
static inline String name = "L2Norm";
static constexpr auto name = "L2Norm";
static constexpr auto CreateTupleFunction = FunctionL2Norm::create;
static constexpr auto CreateArrayFunction = createFunctionArrayL2Norm;
@ -1189,7 +1189,7 @@ struct L2NormTraits
struct L2SquaredNormTraits
{
static inline String name = "L2SquaredNorm";
static constexpr auto name = "L2SquaredNorm";
static constexpr auto CreateTupleFunction = FunctionL2SquaredNorm::create;
static constexpr auto CreateArrayFunction = createFunctionArrayL2SquaredNorm;
@ -1197,7 +1197,7 @@ struct L2SquaredNormTraits
struct LpNormTraits
{
static inline String name = "LpNorm";
static constexpr auto name = "LpNorm";
static constexpr auto CreateTupleFunction = FunctionLpNorm::create;
static constexpr auto CreateArrayFunction = createFunctionArrayLpNorm;
@ -1205,7 +1205,7 @@ struct LpNormTraits
struct LinfNormTraits
{
static inline String name = "LinfNorm";
static constexpr auto name = "LinfNorm";
static constexpr auto CreateTupleFunction = FunctionLinfNorm::create;
static constexpr auto CreateArrayFunction = createFunctionArrayLinfNorm;
@ -1213,7 +1213,7 @@ struct LinfNormTraits
struct L1DistanceTraits
{
static inline String name = "L1Distance";
static constexpr auto name = "L1Distance";
static constexpr auto CreateTupleFunction = FunctionL1Distance::create;
static constexpr auto CreateArrayFunction = createFunctionArrayL1Distance;
@ -1221,7 +1221,7 @@ struct L1DistanceTraits
struct L2DistanceTraits
{
static inline String name = "L2Distance";
static constexpr auto name = "L2Distance";
static constexpr auto CreateTupleFunction = FunctionL2Distance::create;
static constexpr auto CreateArrayFunction = createFunctionArrayL2Distance;
@ -1229,7 +1229,7 @@ struct L2DistanceTraits
struct L2SquaredDistanceTraits
{
static inline String name = "L2SquaredDistance";
static constexpr auto name = "L2SquaredDistance";
static constexpr auto CreateTupleFunction = FunctionL2SquaredDistance::create;
static constexpr auto CreateArrayFunction = createFunctionArrayL2SquaredDistance;
@ -1237,7 +1237,7 @@ struct L2SquaredDistanceTraits
struct LpDistanceTraits
{
static inline String name = "LpDistance";
static constexpr auto name = "LpDistance";
static constexpr auto CreateTupleFunction = FunctionLpDistance::create;
static constexpr auto CreateArrayFunction = createFunctionArrayLpDistance;
@ -1245,7 +1245,7 @@ struct LpDistanceTraits
struct LinfDistanceTraits
{
static inline String name = "LinfDistance";
static constexpr auto name = "LinfDistance";
static constexpr auto CreateTupleFunction = FunctionLinfDistance::create;
static constexpr auto CreateArrayFunction = createFunctionArrayLinfDistance;
@ -1253,7 +1253,7 @@ struct LinfDistanceTraits
struct CosineDistanceTraits
{
static inline String name = "cosineDistance";
static constexpr auto name = "cosineDistance";
static constexpr auto CreateTupleFunction = FunctionCosineDistance::create;
static constexpr auto CreateArrayFunction = createFunctionArrayCosineDistance;

View File

@ -90,7 +90,10 @@ bool AsynchronousReadBufferFromFileDescriptor::nextImpl()
prefetch_future = {};
file_offset_of_buffer_end += size;
if (size)
assert(offset <= size);
size_t bytes_read = size - offset;
if (bytes_read)
{
prefetch_buffer.swap(memory);
/// Adjust the working buffer so that it ignores `offset` bytes.
@ -109,7 +112,10 @@ bool AsynchronousReadBufferFromFileDescriptor::nextImpl()
auto [size, offset] = asyncReadInto(memory.data(), memory.size()).get();
file_offset_of_buffer_end += size;
if (size)
assert(offset <= size);
size_t bytes_read = size - offset;
if (bytes_read)
{
/// Adjust the working buffer so that it ignores `offset` bytes.
internal_buffer = Buffer(memory.data(), memory.data() + memory.size());

Some files were not shown because too many files have changed in this diff Show More