diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index d69168b01ee..1df8cea4ad5 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -3,6 +3,9 @@ name: BackportPR
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
+ # Export system tables to ClickHouse Cloud
+ CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
+ CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 0fbcb95fc12..284d94b5c5a 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -3,6 +3,9 @@ name: MasterCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
+ # Export system tables to ClickHouse Cloud
+ CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
+ CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:
@@ -892,6 +895,48 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+ BuilderBinS390X:
+ needs: [DockerHubPush]
+ runs-on: [self-hosted, builder]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/build_check
+ IMAGES_PATH=${{runner.temp}}/images_path
+ REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+ CACHES_PATH=${{runner.temp}}/../ccaches
+ BUILD_NAME=binary_s390x
+ EOF
+ - name: Download changed images
+ uses: actions/download-artifact@v3
+ with:
+ name: changed_images
+ path: ${{ env.IMAGES_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ submodules: true
+ fetch-depth: 0 # otherwise we will have no info about contributors
+ - name: Build
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
+ - name: Upload build URLs to artifacts
+ if: ${{ success() || failure() }}
+ uses: actions/upload-artifact@v3
+ with:
+ name: ${{ env.BUILD_URLS }}
+ path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@@ -975,6 +1020,7 @@ jobs:
- BuilderBinFreeBSD
- BuilderBinPPC64
- BuilderBinRISCV64
+ - BuilderBinS390X
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
@@ -2870,6 +2916,216 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan0:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=0
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan1:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=1
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan2:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=2
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan3:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=3
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan4:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=4
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan5:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=5
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
IntegrationTestsTsan0:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
@@ -3433,7 +3689,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
- CHECK_NAME=Unit tests (release-clang)
+ CHECK_NAME=Unit tests (release)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@@ -3963,6 +4219,12 @@ jobs:
- IntegrationTestsAsan3
- IntegrationTestsAsan4
- IntegrationTestsAsan5
+ - IntegrationTestsAnalyzerAsan0
+ - IntegrationTestsAnalyzerAsan1
+ - IntegrationTestsAnalyzerAsan2
+ - IntegrationTestsAnalyzerAsan3
+ - IntegrationTestsAnalyzerAsan4
+ - IntegrationTestsAnalyzerAsan5
- IntegrationTestsRelease0
- IntegrationTestsRelease1
- IntegrationTestsRelease2
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index f898e764915..cf9f025585d 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -3,6 +3,9 @@ name: PullRequestCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
+ # Export system tables to ClickHouse Cloud
+ CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
+ CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
pull_request:
@@ -952,6 +955,47 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+ BuilderBinS390X:
+ needs: [DockerHubPush, FastTest, StyleCheck]
+ runs-on: [self-hosted, builder]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/build_check
+ IMAGES_PATH=${{runner.temp}}/images_path
+ REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+ CACHES_PATH=${{runner.temp}}/../ccaches
+ BUILD_NAME=binary_s390x
+ EOF
+ - name: Download changed images
+ uses: actions/download-artifact@v3
+ with:
+ name: changed_images
+ path: ${{ env.IMAGES_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ submodules: true
+ - name: Build
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
+ - name: Upload build URLs to artifacts
+ if: ${{ success() || failure() }}
+ uses: actions/upload-artifact@v3
+ with:
+ name: ${{ env.BUILD_URLS }}
+ path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@@ -1034,6 +1078,7 @@ jobs:
- BuilderBinFreeBSD
- BuilderBinPPC64
- BuilderBinRISCV64
+ - BuilderBinS390X
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
@@ -3903,6 +3948,216 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan0:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=0
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan1:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=1
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan2:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=2
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan3:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=3
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan4:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=4
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ IntegrationTestsAnalyzerAsan5:
+ needs: [BuilderDebAsan]
+ runs-on: [self-hosted, stress-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/integration_tests_asan
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=Integration tests (asan, analyzer)
+ REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
+ RUN_BY_HASH_NUM=5
+ RUN_BY_HASH_TOTAL=6
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: Integration test
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 integration_test_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
IntegrationTestsTsan0:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
@@ -4331,7 +4586,7 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
- CHECK_NAME=Unit tests (release-clang)
+ CHECK_NAME=Unit tests (release)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
@@ -4889,6 +5144,12 @@ jobs:
- IntegrationTestsAsan3
- IntegrationTestsAsan4
- IntegrationTestsAsan5
+ - IntegrationTestsAnalyzerAsan0
+ - IntegrationTestsAnalyzerAsan1
+ - IntegrationTestsAnalyzerAsan2
+ - IntegrationTestsAnalyzerAsan3
+ - IntegrationTestsAnalyzerAsan4
+ - IntegrationTestsAnalyzerAsan5
- IntegrationTestsRelease0
- IntegrationTestsRelease1
- IntegrationTestsRelease2
@@ -4966,3 +5227,39 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
+##############################################################################################
+##################################### SQL TEST ###############################################
+##############################################################################################
+ SQLTest:
+ needs: [BuilderDebRelease]
+ runs-on: [self-hosted, fuzzer-unit-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/sqltest
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=SQLTest
+ REPO_COPY=${{runner.temp}}/sqltest/ClickHouse
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v3
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ clear-repository: true
+ - name: SQLTest
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 sqltest.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 21284815583..e5527177aa5 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -3,6 +3,9 @@ name: ReleaseBranchCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
+ # Export system tables to ClickHouse Cloud
+ CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
+ CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:
diff --git a/.gitignore b/.gitignore
index a04c60d5ca3..5341f23a94f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -69,6 +69,7 @@ cmake-build-*
*.pyc
__pycache__
*.pytest_cache
+.mypy_cache
test.cpp
CPackConfig.cmake
@@ -161,8 +162,10 @@ tests/queries/0_stateless/test_*
tests/queries/0_stateless/*.binary
tests/queries/0_stateless/*.generated-expect
tests/queries/0_stateless/*.expect.history
+tests/integration/**/_gen
# rust
/rust/**/target
# It is autogenerated from *.in
/rust/**/.cargo/config.toml
+/rust/**/vendor
diff --git a/.gitmodules b/.gitmodules
index 151dc28c55b..86fd7832dd9 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -258,9 +258,6 @@
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash
-[submodule "contrib/hashidsxx"]
- path = contrib/hashidsxx
- url = https://github.com/schoentoon/hashidsxx
[submodule "contrib/nats-io"]
path = contrib/nats-io
url = https://github.com/ClickHouse/nats.c
@@ -334,6 +331,10 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing
+[submodule "contrib/libarchive"]
+ path = contrib/libarchive
+ url = https://github.com/libarchive/libarchive.git
+ ignore = dirty
[submodule "contrib/libfiu"]
path = contrib/libfiu
url = https://github.com/ClickHouse/libfiu.git
@@ -343,3 +344,6 @@
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/c-ares/c-ares.git
+[submodule "contrib/incbin"]
+ path = contrib/incbin
+ url = https://github.com/graphitemaster/incbin.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf6b309ef2c..ce57affb161 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
### Table of Contents
+**[ClickHouse release v23.7, 2023-07-27](#237)**
**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
@@ -9,6 +10,181 @@
# 2023 Changelog
+### ClickHouse release 23.7, 2023-07-27
+
+#### Backward Incompatible Change
+* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)).
+* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)).
+* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### New Feature
+* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)).
+* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)).
+* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)).
+* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
+* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)).
+* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)).
+* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)).
+* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)).
+* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)).
+* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)).
+* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)).
+* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)).
+* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)).
+* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)).
+* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)).
+* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)).
+* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
+* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)).
+* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)).
+* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)).
+* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)).
+* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
+* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)).
+* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Performance Improvement
+* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)).
+* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)).
+* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)).
+* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)).
+* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)).
+* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)).
+* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)).
+* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
+* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
+* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)).
+* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)).
+* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)).
+
+#### Improvement
+* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)).
+* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)).
+* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)).
+* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)).
+* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)).
+* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)).
+* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)).
+* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)).
+* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)).
+* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
+* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)).
+* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)).
+* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)).
+* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)).
+* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)).
+* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)).
+* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)).
+* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)).
+* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)).
+* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)).
+* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)).
+* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)).
+
+#### Experimental Feature
+* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). This is controlled by the setting `output_format_parquet_use_custom_encoder` which is disabled by default, because the feature is non-ideal.
+* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release.
+* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)).
+* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)).
+* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)).
+* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)).
+* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)).
+* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)).
+* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete.
+
+#### Build/Testing/Packaging Improvement
+* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)).
+* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)).
+* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)).
+* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)).
+* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)).
+* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)).
+* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)).
+* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)).
+* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)).
+* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)).
+* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)).
+* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)).
+* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)).
+* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)).
+* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)).
+* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)).
+* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)).
+* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)).
+* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)).
+* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)).
+* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
+* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)).
+* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)).
+* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
+* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)).
+* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)).
+
+
### ClickHouse release 23.6, 2023-06-29
#### Backward Incompatible Change
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 45c3c422d7a..55bcf5fbf3c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -165,8 +165,14 @@ elseif(GLIBC_COMPATIBILITY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
endif ()
-# Make sure the final executable has symbols exported
-set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
+if (OS_LINUX)
+ # We should not export dynamic symbols, because:
+ # - The main clickhouse binary does not use dlopen,
+ # and whatever is poisoning it by LD_PRELOAD should not link to our symbols.
+ # - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries
+ # should not expose their symbols to ODBC drivers and libraries.
+ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
+endif ()
if (OS_DARWIN)
# The `-all_load` flag forces loading of all symbols from all libraries,
@@ -202,9 +208,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS
"Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)"
${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT})
-if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
- set(USE_DEBUG_HELPERS ON)
-endif()
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
diff --git a/README.md b/README.md
index 1036e1a97e1..4ff9b9caaa1 100644
--- a/README.md
+++ b/README.md
@@ -23,24 +23,21 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
-* [**v23.6 Release Webinar**](https://clickhouse.com/company/events/v23-6-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-06) - Jun 29 - 23.6 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
-* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18
-* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19
-* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20
-* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27
+* [**v23.8 Community Call**](https://clickhouse.com/company/events/v23-8-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-08) - Aug 31 - 23.8 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
+* [**ClickHouse & AI - A Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/294472987) - Aug 8
* [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Sep 12
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com.
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
-* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now!
+* **Recording available**: [**v23.6 Release Webinar**](https://www.youtube.com/watch?v=cuf_hYn7dqU) All the features of 23.6, one convenient video! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
- ## Interested in joining ClickHouse and making it your full time job?
+ ## Interested in joining ClickHouse and making it your full-time job?
-We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker as well as a doer - we’ll definitely click!
+We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting-edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker and a doer - we’ll definitely click!
Check out our **current openings** here: https://clickhouse.com/company/careers
diff --git a/SECURITY.md b/SECURITY.md
index 4ba5f13d09c..d61533b44b9 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
+| 23.7 | ✔️ |
| 23.6 | ✔️ |
| 23.5 | ✔️ |
-| 23.4 | ✔️ |
+| 23.4 | ❌ |
| 23.3 | ✔️ |
| 23.2 | ❌ |
| 23.1 | ❌ |
diff --git a/base/base/EnumReflection.h b/base/base/EnumReflection.h
index 0d1f8ae0a40..e3208f16a75 100644
--- a/base/base/EnumReflection.h
+++ b/base/base/EnumReflection.h
@@ -3,6 +3,7 @@
#include
#include
+
template concept is_enum = std::is_enum_v;
namespace detail
diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp
index 4c6d97b4444..0b43be38149 100644
--- a/base/base/JSON.cpp
+++ b/base/base/JSON.cpp
@@ -7,8 +7,6 @@
#include
#include
-#include
-
#define JSON_MAX_DEPTH 100
diff --git a/base/base/StringRef.h b/base/base/StringRef.h
index f428b7c747f..448bc102b41 100644
--- a/base/base/StringRef.h
+++ b/base/base/StringRef.h
@@ -8,6 +8,7 @@
#include
#include
+#include
#include
#include
@@ -274,6 +275,8 @@ struct CRC32Hash
if (size == 0)
return 0;
+ chassert(pos);
+
if (size < 8)
{
return static_cast(hashLessThan8(x.data, x.size));
diff --git a/base/base/defines.h b/base/base/defines.h
index 6abf8155b95..ee29ecf6118 100644
--- a/base/base/defines.h
+++ b/base/base/defines.h
@@ -115,8 +115,15 @@
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
#if !defined(chassert)
#if defined(ABORT_ON_LOGICAL_ERROR)
+ // clang-format off
+ #include
+ namespace DB
+ {
+ void abortOnFailedAssertion(const String & description);
+ }
#define chassert(x) static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
#define UNREACHABLE() abort()
+ // clang-format off
#else
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while
diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h
index 83232669c04..fda94edaa88 100644
--- a/base/base/find_symbols.h
+++ b/base/base/find_symbols.h
@@ -448,7 +448,7 @@ inline char * find_last_not_symbols_or_null(char * begin, char * end)
/// See https://github.com/boostorg/algorithm/issues/63
/// And https://bugs.llvm.org/show_bug.cgi?id=41141
template
-inline void splitInto(To & to, const std::string & what, bool token_compress = false)
+inline To & splitInto(To & to, std::string_view what, bool token_compress = false)
{
const char * pos = what.data();
const char * end = pos + what.size();
@@ -464,4 +464,6 @@ inline void splitInto(To & to, const std::string & what, bool token_compress = f
else
pos = delimiter_or_end;
}
+
+ return to;
}
diff --git a/base/base/move_extend.h b/base/base/move_extend.h
new file mode 100644
index 00000000000..6e5b16e037c
--- /dev/null
+++ b/base/base/move_extend.h
@@ -0,0 +1,9 @@
+#pragma once
+
+/// Extend @p to by moving elements from @p from to @p to end
+/// @return @p to iterator to first of moved elements.
+template
+typename To::iterator moveExtend(To & to, From && from)
+{
+ return to.insert(to.end(), std::make_move_iterator(from.begin()), std::make_move_iterator(from.end()));
+}
diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h
index 411841e6d9f..fc4e9e551ca 100644
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@@ -12,7 +12,6 @@
#include
#include
-#include
#include
// NOLINTBEGIN(*)
@@ -22,6 +21,7 @@
#define CONSTEXPR_FROM_DOUBLE constexpr
using FromDoubleIntermediateType = long double;
#else
+#include
/// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended`
#define CONSTEXPR_FROM_DOUBLE
using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended;
diff --git a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp
index 4caf097c28a..1c5555f8cf3 100644
--- a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp
+++ b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp
@@ -19,7 +19,6 @@
#include "Poco/UTF16Encoding.h"
#include "Poco/Buffer.h"
#include "Poco/Exception.h"
-#include
using Poco::Buffer;
diff --git a/base/poco/Data/include/Poco/Data/TypeHandler.h b/base/poco/Data/include/Poco/Data/TypeHandler.h
index 34f88e986f7..e7633de7018 100644
--- a/base/poco/Data/include/Poco/Data/TypeHandler.h
+++ b/base/poco/Data/include/Poco/Data/TypeHandler.h
@@ -97,7 +97,7 @@ namespace Data
///
/// static void extract(std::size_t pos, Person& obj, const Person& defVal, AbstractExtractor::Ptr pExt)
/// {
- /// // defVal is the default person we should use if we encunter NULL entries, so we take the individual fields
+ /// // defVal is the default person we should use if we encounter NULL entries, so we take the individual fields
/// // as defaults. You can do more complex checking, ie return defVal if only one single entry of the fields is null etc...
/// poco_assert_dbg (!pExt.isNull());
/// std::string lastName;
diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h
index e8f04888ab4..282c7fb5fd1 100644
--- a/base/poco/Foundation/include/Poco/Message.h
+++ b/base/poco/Foundation/include/Poco/Message.h
@@ -67,6 +67,8 @@ public:
Message(
const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {});
+ Message(
+ std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str);
/// Creates a Message with the given source, text, priority,
/// source file path and line.
///
diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h
index 1880af4ccd2..30654504e0b 100644
--- a/base/poco/Foundation/include/Poco/URI.h
+++ b/base/poco/Foundation/include/Poco/URI.h
@@ -57,7 +57,7 @@ public:
URI();
/// Creates an empty URI.
- explicit URI(const std::string & uri);
+ explicit URI(const std::string & uri, bool enable_url_encoding = true);
/// Parses an URI from the given string. Throws a
/// SyntaxException if the uri is not valid.
@@ -350,6 +350,10 @@ protected:
static const std::string ILLEGAL;
private:
+ void encodePath(std::string & encodedStr) const;
+ void decodePath(const std::string & encodedStr);
+
+
std::string _scheme;
std::string _userInfo;
std::string _host;
@@ -357,6 +361,8 @@ private:
std::string _path;
std::string _query;
std::string _fragment;
+
+ bool _enable_url_encoding = true;
};
diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp
index 663c96e47a2..54118cc0fc5 100644
--- a/base/poco/Foundation/src/Message.cpp
+++ b/base/poco/Foundation/src/Message.cpp
@@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
}
+Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str):
+ _source(std::move(source)),
+ _text(std::move(text)),
+ _prio(prio),
+ _tid(0),
+ _file(file),
+ _line(line),
+ _pMap(0),
+ _fmt_str(fmt_str)
+{
+ init();
+}
+
Message::Message(const Message& msg):
_source(msg._source),
_text(msg._text),
diff --git a/base/poco/Foundation/src/Task.cpp b/base/poco/Foundation/src/Task.cpp
index a850ae37eff..4303d50d6e8 100644
--- a/base/poco/Foundation/src/Task.cpp
+++ b/base/poco/Foundation/src/Task.cpp
@@ -16,7 +16,6 @@
#include "Poco/TaskManager.h"
#include "Poco/Exception.h"
-#include
#include
diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp
index 5543e02b279..41e331bb080 100644
--- a/base/poco/Foundation/src/URI.cpp
+++ b/base/poco/Foundation/src/URI.cpp
@@ -36,8 +36,8 @@ URI::URI():
}
-URI::URI(const std::string& uri):
- _port(0)
+URI::URI(const std::string& uri, bool enable_url_encoding):
+ _port(0), _enable_url_encoding(enable_url_encoding)
{
parse(uri);
}
@@ -107,7 +107,8 @@ URI::URI(const URI& uri):
_port(uri._port),
_path(uri._path),
_query(uri._query),
- _fragment(uri._fragment)
+ _fragment(uri._fragment),
+ _enable_url_encoding(uri._enable_url_encoding)
{
}
@@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
_port(baseURI._port),
_path(baseURI._path),
_query(baseURI._query),
- _fragment(baseURI._fragment)
+ _fragment(baseURI._fragment),
+ _enable_url_encoding(baseURI._enable_url_encoding)
{
resolve(relativeURI);
}
@@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri)
_path = uri._path;
_query = uri._query;
_fragment = uri._fragment;
+ _enable_url_encoding = uri._enable_url_encoding;
}
return *this;
}
@@ -181,6 +184,7 @@ void URI::swap(URI& uri)
std::swap(_path, uri._path);
std::swap(_query, uri._query);
std::swap(_fragment, uri._fragment);
+ std::swap(_enable_url_encoding, uri._enable_url_encoding);
}
@@ -201,7 +205,7 @@ std::string URI::toString() const
std::string uri;
if (isRelative())
{
- encode(_path, RESERVED_PATH, uri);
+ encodePath(uri);
}
else
{
@@ -217,7 +221,7 @@ std::string URI::toString() const
{
if (!auth.empty() && _path[0] != '/')
uri += '/';
- encode(_path, RESERVED_PATH, uri);
+ encodePath(uri);
}
else if (!_query.empty() || !_fragment.empty())
{
@@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority)
void URI::setPath(const std::string& path)
{
_path.clear();
- decode(path, _path);
+ decodePath(path);
}
@@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc)
std::string URI::getPathEtc() const
{
std::string pathEtc;
- encode(_path, RESERVED_PATH, pathEtc);
+ encodePath(pathEtc);
if (!_query.empty())
{
pathEtc += '?';
@@ -436,7 +440,7 @@ std::string URI::getPathEtc() const
std::string URI::getPathAndQuery() const
{
std::string pathAndQuery;
- encode(_path, RESERVED_PATH, pathAndQuery);
+ encodePath(pathAndQuery);
if (!_query.empty())
{
pathAndQuery += '?';
@@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
}
}
+void URI::encodePath(std::string & encodedStr) const
+{
+ if (_enable_url_encoding)
+ encode(_path, RESERVED_PATH, encodedStr);
+ else
+ encodedStr = _path;
+}
+
+void URI::decodePath(const std::string & encodedStr)
+{
+ if (_enable_url_encoding)
+ decode(encodedStr, _path);
+ else
+ _path = encodedStr;
+}
bool URI::isWellKnownPort() const
{
@@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it
{
std::string path;
while (it != end && *it != '?' && *it != '#') path += *it++;
- decode(path, _path);
+ decodePath(path);
}
diff --git a/base/poco/JSON/src/Object.cpp b/base/poco/JSON/src/Object.cpp
index 7fca65c5b01..b041f570934 100644
--- a/base/poco/JSON/src/Object.cpp
+++ b/base/poco/JSON/src/Object.cpp
@@ -14,7 +14,6 @@
#include "Poco/JSON/Object.h"
#include
-#include
using Poco::Dynamic::Var;
diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h
index d495d662f75..167a06eb7ff 100644
--- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h
@@ -306,7 +306,7 @@ namespace Net
DEFAULT_KEEP_ALIVE_TIMEOUT = 8
};
- void reconnect();
+ virtual void reconnect();
/// Connects the underlying socket to the HTTP server.
int write(const char * buffer, std::streamsize length);
diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp
index c5697b556d1..2712c0c452e 100644
--- a/base/poco/Net/src/HTTPClientSession.cpp
+++ b/base/poco/Net/src/HTTPClientSession.cpp
@@ -26,7 +26,6 @@
#include "Poco/CountingStream.h"
#include "Poco/RegularExpression.h"
#include
-#include
using Poco::NumberFormatter;
diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h
index 65917ac9dd4..c19eecf5c73 100644
--- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h
+++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h
@@ -146,7 +146,7 @@ namespace Net
std::string cipherList;
/// Specifies the supported ciphers in OpenSSL notation.
- /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH".
+ /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH".
std::string dhParamsFile;
/// Specifies a file containing Diffie-Hellman parameters.
@@ -172,7 +172,7 @@ namespace Net
VerificationMode verificationMode = VERIFY_RELAXED,
int verificationDepth = 9,
bool loadDefaultCAs = false,
- const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH");
+ const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH");
/// Creates a Context.
///
/// * usage specifies whether the context is used by a client or server.
@@ -200,7 +200,7 @@ namespace Net
VerificationMode verificationMode = VERIFY_RELAXED,
int verificationDepth = 9,
bool loadDefaultCAs = false,
- const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH");
+ const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH");
/// Creates a Context.
///
/// * usage specifies whether the context is used by a client or server.
diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h
index 21a1ed685e5..e4037c87927 100644
--- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h
+++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h
@@ -76,7 +76,7 @@ namespace Net
/// none|relaxed|strict|once
/// 1..9
/// true|false
- /// ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH
+ /// ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH
/// true|false
///
/// KeyFileHandler
diff --git a/base/poco/NetSSL_OpenSSL/src/Context.cpp b/base/poco/NetSSL_OpenSSL/src/Context.cpp
index ca220c40a33..d0bab902b89 100644
--- a/base/poco/NetSSL_OpenSSL/src/Context.cpp
+++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp
@@ -41,7 +41,7 @@ Context::Params::Params():
verificationMode(VERIFY_RELAXED),
verificationDepth(9),
loadDefaultCAs(false),
- cipherList("ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH")
+ cipherList("ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH")
{
}
diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 821b7b46855..9919d018046 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54476)
+SET(VERSION_REVISION 54477)
SET(VERSION_MAJOR 23)
-SET(VERSION_MINOR 7)
+SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH d1c7e13d08868cb04d3562dcced704dd577cb1df)
-SET(VERSION_DESCRIBE v23.7.1.1-testing)
-SET(VERSION_STRING 23.7.1.1)
+SET(VERSION_GITHASH a70127baecc451f1f7073bad7b6198f6703441d8)
+SET(VERSION_DESCRIBE v23.8.1.1-testing)
+SET(VERSION_STRING 23.8.1.1)
# end of autochange
diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake
deleted file mode 100644
index e5428c24939..00000000000
--- a/cmake/embed_binary.cmake
+++ /dev/null
@@ -1,58 +0,0 @@
-# Embed a set of resource files into a resulting object file.
-#
-# Signature: `clickhouse_embed_binaries(TARGET RESOURCE_DIR RESOURCES ...)
-#
-# This will generate a static library target named ``, which contains the contents of
-# each `` file. The files should be located in ``. defaults to
-# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
-#
-# Each resource will result in three symbols in the final archive, based on the name ``.
-# These are:
-# 1. `_binary__start`: Points to the start of the binary data from ``.
-# 2. `_binary__end`: Points to the end of the binary data from ``.
-# 2. `_binary__size`: Points to the size of the binary data from ``.
-#
-# `` is a normalized name derived from ``, by replacing the characters "./-" with
-# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
-# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
-macro(clickhouse_embed_binaries)
- set(one_value_args TARGET RESOURCE_DIR)
- set(resources RESOURCES)
- cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
-
- if (NOT DEFINED EMBED_TARGET)
- message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
- endif()
-
- if (NOT DEFINED EMBED_RESOURCE_DIR)
- set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
- endif()
-
- list(LENGTH EMBED_RESOURCES N_RESOURCES)
- if (N_RESOURCES LESS 1)
- message(FATAL_ERROR "The list of binary resources to embed may not be empty")
- endif()
-
- add_library("${EMBED_TARGET}" STATIC)
- set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
-
- set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
-
- foreach(RESOURCE_FILE ${EMBED_RESOURCES})
- set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
- set(BINARY_FILE_NAME "${RESOURCE_FILE}")
-
- # Normalize the name of the resource.
- string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
- string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
-
- # Generate the configured assembly file in the output directory.
- configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
-
- # Set the include directory for relative paths specified for `.incbin` directive.
- set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
-
- target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
- set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
- endforeach()
-endmacro()
diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake
index 3a33b3b9989..acc38b6fa2a 100644
--- a/cmake/limit_jobs.cmake
+++ b/cmake/limit_jobs.cmake
@@ -1,43 +1,38 @@
-# Usage:
-# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes
-# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "")
-# include (cmake/limit_jobs.cmake)
+# Limit compiler/linker job concurrency to avoid OOMs on subtrees where compilation/linking is memory-intensive.
+#
+# Usage from CMake:
+# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # megabyte
+# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") # megabyte
+# include (cmake/limit_jobs.cmake)
+#
+# (bigger values mean fewer jobs)
-cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) # Not available under freebsd
+cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY)
cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES)
-# 1 if not set
-option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" "")
+# Set to disable the automatic job-limiting
+option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" OFF)
+option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" OFF)
-# 1 if not set
-option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" "")
-
-if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY)
+if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY)
math(EXPR PARALLEL_COMPILE_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_COMPILER_MEMORY})
if (NOT PARALLEL_COMPILE_JOBS)
set (PARALLEL_COMPILE_JOBS 1)
endif ()
- if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
- set (PARALLEL_COMPILE_JOBS_LESS TRUE)
+ if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+ message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
endif()
endif ()
-if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
- set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
- string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
- set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
-endif ()
-
-
-if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
+if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY)
math(EXPR PARALLEL_LINK_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_LINKER_MEMORY})
if (NOT PARALLEL_LINK_JOBS)
set (PARALLEL_LINK_JOBS 1)
endif ()
- if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
- set (PARALLEL_LINK_JOBS_LESS TRUE)
+ if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+ message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
endif()
endif ()
@@ -52,20 +47,16 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE
set (PARALLEL_LINK_JOBS 2)
endif()
-if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES))
+message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).")
+
+if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+ set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
+ string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
+ set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
+endif ()
+
+if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})
set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS})
endif ()
-
-if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
- message(STATUS
- "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory.
- Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)")
- if (PARALLEL_COMPILE_JOBS_LESS)
- message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
- endif()
- if (PARALLEL_LINK_JOBS_LESS)
- message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
- endif()
-endif ()
diff --git a/cmake/linux/toolchain-s390x.cmake b/cmake/linux/toolchain-s390x.cmake
index b85d4253b89..945eb9affa4 100644
--- a/cmake/linux/toolchain-s390x.cmake
+++ b/cmake/linux/toolchain-s390x.cmake
@@ -20,6 +20,9 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/s390x-linux-gnu/libc")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
+set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
+set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
+set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
diff --git a/cmake/split_debug_symbols.cmake b/cmake/split_debug_symbols.cmake
index a9c2158359a..d6821eb6c48 100644
--- a/cmake/split_debug_symbols.cmake
+++ b/cmake/split_debug_symbols.cmake
@@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols)
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
- # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
- COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
+ # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check.
+ # Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces.
+ COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Associate stripped binary with debug symbols:
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM
diff --git a/cmake/target.cmake b/cmake/target.cmake
index ffab08f1103..204a67d4357 100644
--- a/cmake/target.cmake
+++ b/cmake/target.cmake
@@ -47,7 +47,7 @@ if (CMAKE_CROSSCOMPILING)
set (ENABLE_RUST OFF CACHE INTERNAL "")
elseif (ARCH_S390X)
set (ENABLE_GRPC OFF CACHE INTERNAL "")
- set (ENABLE_SENTRY OFF CACHE INTERNAL "")
+ set (ENABLE_RUST OFF CACHE INTERNAL "")
endif ()
elseif (OS_FREEBSD)
# FIXME: broken dependencies
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 2af468970f1..16135351cce 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -92,6 +92,7 @@ add_contrib (google-protobuf-cmake google-protobuf)
add_contrib (openldap-cmake openldap)
add_contrib (grpc-cmake grpc)
add_contrib (msgpack-c-cmake msgpack-c)
+add_contrib (libarchive-cmake libarchive)
add_contrib (corrosion-cmake corrosion)
@@ -164,14 +165,13 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
-add_contrib (hashidsxx-cmake hashidsxx)
+add_contrib (incbin-cmake incbin)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP)
add_contrib (libstemmer-c-cmake libstemmer_c)
add_contrib (wordnet-blast-cmake wordnet-blast)
add_contrib (lemmagen-c-cmake lemmagen-c)
- add_contrib (nlp-data-cmake nlp-data)
add_contrib (cld2-cmake cld2)
endif()
diff --git a/contrib/NuRaft b/contrib/NuRaft
index 491eaf592d9..eb1572129c7 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 491eaf592d950e0e37accbe8b3f217e068c9fecf
+Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96
diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index 46b86cb4ddb..e3ea0381595 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -502,9 +502,10 @@ target_include_directories(_parquet SYSTEM BEFORE
"${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src"
"${CMAKE_CURRENT_SOURCE_DIR}/cpp/src")
target_link_libraries(_parquet
- PUBLIC _arrow
- PRIVATE
+ PUBLIC
+ _arrow
ch_contrib::thrift
+ PRIVATE
boost::headers_only
boost::regex
OpenSSL::Crypto OpenSSL::SSL)
diff --git a/contrib/cctz b/contrib/cctz
index 5e05432420f..8529bcef5cd 160000
--- a/contrib/cctz
+++ b/contrib/cctz
@@ -1 +1 @@
-Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2
+Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c
diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt
index 10070fbd949..7161f743de1 100644
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@@ -1,4 +1,3 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
set (SRCS
@@ -23,12 +22,10 @@ if (OS_FREEBSD)
endif ()
# Related to time_zones table:
-# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
-# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
-# as the library that's built using embedded tzdata is also specific to OS_LINUX
-set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
+# TimeZones.generated.cpp is autogenerated each time during a build
+set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build.
-file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
+file(REMOVE ${TIMEZONES_FILE})
# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
@@ -36,28 +33,44 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
-set(TIMEZONE_RESOURCE_FILES)
-
# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
+file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
+file(APPEND ${TIMEZONES_FILE} "#include \n")
+
+set (COUNTER 1)
+foreach(TIMEZONE ${TIMEZONES})
+ file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n")
+ MATH(EXPR COUNTER "${COUNTER}+1")
+endforeach(TIMEZONE)
+
+file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
foreach(TIMEZONE ${TIMEZONES})
- file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
- list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
+ file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
+ MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
-clickhouse_embed_binaries(
- TARGET tzdata
- RESOURCE_DIR "${TZDIR}"
- RESOURCES ${TIMEZONE_RESOURCE_FILES}
-)
-add_dependencies(_cctz tzdata)
-target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}")
+
+file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")
+
+file(APPEND ${TIMEZONES_FILE} "#include \n\n")
+file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
+
+set (COUNTER 1)
+foreach(TIMEZONE ${TIMEZONES})
+ file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
+ MATH(EXPR COUNTER "${COUNTER}+1")
+endforeach(TIMEZONE)
+
+file(APPEND ${TIMEZONES_FILE} " return {};\n")
+file(APPEND ${TIMEZONES_FILE} "}\n")
+
+add_library (tzdata ${TIMEZONES_FILE})
+target_link_libraries(tzdata ch_contrib::incbin)
+target_link_libraries(_cctz tzdata)
add_library(ch_contrib::cctz ALIAS _cctz)
diff --git a/contrib/cityhash102/include/city.h b/contrib/cityhash102/include/city.h
index 87363d16444..c98eb7e3585 100644
--- a/contrib/cityhash102/include/city.h
+++ b/contrib/cityhash102/include/city.h
@@ -73,8 +73,8 @@ struct uint128
uint128() = default;
uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {}
- friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); }
- friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); }
+
+ friend auto operator<=>(const uint128 &, const uint128 &) = default;
};
inline uint64 Uint128Low64(const uint128 & x) { return x.low64; }
diff --git a/contrib/curl b/contrib/curl
index b0edf0b7dae..eb3b049df52 160000
--- a/contrib/curl
+++ b/contrib/curl
@@ -1 +1 @@
-Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d
+Subproject commit eb3b049df526bf125eda23218e680ce7fa9ec46c
diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt
index 70d9c2816dc..733865d5101 100644
--- a/contrib/curl-cmake/CMakeLists.txt
+++ b/contrib/curl-cmake/CMakeLists.txt
@@ -8,125 +8,122 @@ endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl")
set (SRCS
- "${LIBRARY_DIR}/lib/fopen.c"
- "${LIBRARY_DIR}/lib/noproxy.c"
- "${LIBRARY_DIR}/lib/idn.c"
- "${LIBRARY_DIR}/lib/cfilters.c"
- "${LIBRARY_DIR}/lib/cf-socket.c"
+ "${LIBRARY_DIR}/lib/altsvc.c"
+ "${LIBRARY_DIR}/lib/amigaos.c"
+ "${LIBRARY_DIR}/lib/asyn-thread.c"
+ "${LIBRARY_DIR}/lib/base64.c"
+ "${LIBRARY_DIR}/lib/bufq.c"
+ "${LIBRARY_DIR}/lib/bufref.c"
+ "${LIBRARY_DIR}/lib/cf-h1-proxy.c"
"${LIBRARY_DIR}/lib/cf-haproxy.c"
"${LIBRARY_DIR}/lib/cf-https-connect.c"
- "${LIBRARY_DIR}/lib/file.c"
- "${LIBRARY_DIR}/lib/timeval.c"
- "${LIBRARY_DIR}/lib/base64.c"
- "${LIBRARY_DIR}/lib/hostip.c"
- "${LIBRARY_DIR}/lib/progress.c"
- "${LIBRARY_DIR}/lib/formdata.c"
- "${LIBRARY_DIR}/lib/cookie.c"
- "${LIBRARY_DIR}/lib/http.c"
- "${LIBRARY_DIR}/lib/sendf.c"
- "${LIBRARY_DIR}/lib/url.c"
- "${LIBRARY_DIR}/lib/dict.c"
- "${LIBRARY_DIR}/lib/if2ip.c"
- "${LIBRARY_DIR}/lib/speedcheck.c"
- "${LIBRARY_DIR}/lib/ldap.c"
- "${LIBRARY_DIR}/lib/version.c"
- "${LIBRARY_DIR}/lib/getenv.c"
- "${LIBRARY_DIR}/lib/escape.c"
- "${LIBRARY_DIR}/lib/mprintf.c"
- "${LIBRARY_DIR}/lib/telnet.c"
- "${LIBRARY_DIR}/lib/netrc.c"
- "${LIBRARY_DIR}/lib/getinfo.c"
- "${LIBRARY_DIR}/lib/transfer.c"
- "${LIBRARY_DIR}/lib/strcase.c"
- "${LIBRARY_DIR}/lib/easy.c"
- "${LIBRARY_DIR}/lib/curl_fnmatch.c"
- "${LIBRARY_DIR}/lib/curl_log.c"
- "${LIBRARY_DIR}/lib/fileinfo.c"
- "${LIBRARY_DIR}/lib/krb5.c"
- "${LIBRARY_DIR}/lib/memdebug.c"
- "${LIBRARY_DIR}/lib/http_chunks.c"
- "${LIBRARY_DIR}/lib/strtok.c"
+ "${LIBRARY_DIR}/lib/cf-socket.c"
+ "${LIBRARY_DIR}/lib/cfilters.c"
+ "${LIBRARY_DIR}/lib/conncache.c"
"${LIBRARY_DIR}/lib/connect.c"
- "${LIBRARY_DIR}/lib/llist.c"
- "${LIBRARY_DIR}/lib/hash.c"
- "${LIBRARY_DIR}/lib/multi.c"
"${LIBRARY_DIR}/lib/content_encoding.c"
- "${LIBRARY_DIR}/lib/share.c"
- "${LIBRARY_DIR}/lib/http_digest.c"
- "${LIBRARY_DIR}/lib/md4.c"
- "${LIBRARY_DIR}/lib/md5.c"
- "${LIBRARY_DIR}/lib/http_negotiate.c"
- "${LIBRARY_DIR}/lib/inet_pton.c"
- "${LIBRARY_DIR}/lib/strtoofft.c"
- "${LIBRARY_DIR}/lib/strerror.c"
- "${LIBRARY_DIR}/lib/amigaos.c"
+ "${LIBRARY_DIR}/lib/cookie.c"
+ "${LIBRARY_DIR}/lib/curl_addrinfo.c"
+ "${LIBRARY_DIR}/lib/curl_des.c"
+ "${LIBRARY_DIR}/lib/curl_endian.c"
+ "${LIBRARY_DIR}/lib/curl_fnmatch.c"
+ "${LIBRARY_DIR}/lib/curl_get_line.c"
+ "${LIBRARY_DIR}/lib/curl_gethostname.c"
+ "${LIBRARY_DIR}/lib/curl_gssapi.c"
+ "${LIBRARY_DIR}/lib/curl_memrchr.c"
+ "${LIBRARY_DIR}/lib/curl_multibyte.c"
+ "${LIBRARY_DIR}/lib/curl_ntlm_core.c"
+ "${LIBRARY_DIR}/lib/curl_ntlm_wb.c"
+ "${LIBRARY_DIR}/lib/curl_path.c"
+ "${LIBRARY_DIR}/lib/curl_range.c"
+ "${LIBRARY_DIR}/lib/curl_rtmp.c"
+ "${LIBRARY_DIR}/lib/curl_sasl.c"
+ "${LIBRARY_DIR}/lib/curl_sspi.c"
+ "${LIBRARY_DIR}/lib/curl_threads.c"
+ "${LIBRARY_DIR}/lib/curl_trc.c"
+ "${LIBRARY_DIR}/lib/dict.c"
+ "${LIBRARY_DIR}/lib/doh.c"
+ "${LIBRARY_DIR}/lib/dynbuf.c"
+ "${LIBRARY_DIR}/lib/dynhds.c"
+ "${LIBRARY_DIR}/lib/easy.c"
+ "${LIBRARY_DIR}/lib/escape.c"
+ "${LIBRARY_DIR}/lib/file.c"
+ "${LIBRARY_DIR}/lib/fileinfo.c"
+ "${LIBRARY_DIR}/lib/fopen.c"
+ "${LIBRARY_DIR}/lib/formdata.c"
+ "${LIBRARY_DIR}/lib/getenv.c"
+ "${LIBRARY_DIR}/lib/getinfo.c"
+ "${LIBRARY_DIR}/lib/gopher.c"
+ "${LIBRARY_DIR}/lib/hash.c"
+ "${LIBRARY_DIR}/lib/headers.c"
+ "${LIBRARY_DIR}/lib/hmac.c"
"${LIBRARY_DIR}/lib/hostasyn.c"
+ "${LIBRARY_DIR}/lib/hostip.c"
"${LIBRARY_DIR}/lib/hostip4.c"
"${LIBRARY_DIR}/lib/hostip6.c"
"${LIBRARY_DIR}/lib/hostsyn.c"
+ "${LIBRARY_DIR}/lib/hsts.c"
+ "${LIBRARY_DIR}/lib/http.c"
+ "${LIBRARY_DIR}/lib/http2.c"
+ "${LIBRARY_DIR}/lib/http_aws_sigv4.c"
+ "${LIBRARY_DIR}/lib/http_chunks.c"
+ "${LIBRARY_DIR}/lib/http_digest.c"
+ "${LIBRARY_DIR}/lib/http_negotiate.c"
+ "${LIBRARY_DIR}/lib/http_ntlm.c"
+ "${LIBRARY_DIR}/lib/http_proxy.c"
+ "${LIBRARY_DIR}/lib/idn.c"
+ "${LIBRARY_DIR}/lib/if2ip.c"
+ "${LIBRARY_DIR}/lib/imap.c"
"${LIBRARY_DIR}/lib/inet_ntop.c"
+ "${LIBRARY_DIR}/lib/inet_pton.c"
+ "${LIBRARY_DIR}/lib/krb5.c"
+ "${LIBRARY_DIR}/lib/ldap.c"
+ "${LIBRARY_DIR}/lib/llist.c"
+ "${LIBRARY_DIR}/lib/md4.c"
+ "${LIBRARY_DIR}/lib/md5.c"
+ "${LIBRARY_DIR}/lib/memdebug.c"
+ "${LIBRARY_DIR}/lib/mime.c"
+ "${LIBRARY_DIR}/lib/mprintf.c"
+ "${LIBRARY_DIR}/lib/mqtt.c"
+ "${LIBRARY_DIR}/lib/multi.c"
+ "${LIBRARY_DIR}/lib/netrc.c"
+ "${LIBRARY_DIR}/lib/nonblock.c"
+ "${LIBRARY_DIR}/lib/noproxy.c"
+ "${LIBRARY_DIR}/lib/openldap.c"
"${LIBRARY_DIR}/lib/parsedate.c"
+ "${LIBRARY_DIR}/lib/pingpong.c"
+ "${LIBRARY_DIR}/lib/pop3.c"
+ "${LIBRARY_DIR}/lib/progress.c"
+ "${LIBRARY_DIR}/lib/psl.c"
+ "${LIBRARY_DIR}/lib/rand.c"
+ "${LIBRARY_DIR}/lib/rename.c"
+ "${LIBRARY_DIR}/lib/rtsp.c"
"${LIBRARY_DIR}/lib/select.c"
- "${LIBRARY_DIR}/lib/splay.c"
- "${LIBRARY_DIR}/lib/strdup.c"
+ "${LIBRARY_DIR}/lib/sendf.c"
+ "${LIBRARY_DIR}/lib/setopt.c"
+ "${LIBRARY_DIR}/lib/sha256.c"
+ "${LIBRARY_DIR}/lib/share.c"
+ "${LIBRARY_DIR}/lib/slist.c"
+ "${LIBRARY_DIR}/lib/smb.c"
+ "${LIBRARY_DIR}/lib/smtp.c"
+ "${LIBRARY_DIR}/lib/socketpair.c"
"${LIBRARY_DIR}/lib/socks.c"
- "${LIBRARY_DIR}/lib/curl_addrinfo.c"
"${LIBRARY_DIR}/lib/socks_gssapi.c"
"${LIBRARY_DIR}/lib/socks_sspi.c"
- "${LIBRARY_DIR}/lib/curl_sspi.c"
- "${LIBRARY_DIR}/lib/slist.c"
- "${LIBRARY_DIR}/lib/nonblock.c"
- "${LIBRARY_DIR}/lib/curl_memrchr.c"
- "${LIBRARY_DIR}/lib/imap.c"
- "${LIBRARY_DIR}/lib/pop3.c"
- "${LIBRARY_DIR}/lib/smtp.c"
- "${LIBRARY_DIR}/lib/pingpong.c"
- "${LIBRARY_DIR}/lib/rtsp.c"
- "${LIBRARY_DIR}/lib/curl_threads.c"
- "${LIBRARY_DIR}/lib/warnless.c"
- "${LIBRARY_DIR}/lib/hmac.c"
- "${LIBRARY_DIR}/lib/curl_rtmp.c"
- "${LIBRARY_DIR}/lib/openldap.c"
- "${LIBRARY_DIR}/lib/curl_gethostname.c"
- "${LIBRARY_DIR}/lib/gopher.c"
- "${LIBRARY_DIR}/lib/http_proxy.c"
- "${LIBRARY_DIR}/lib/asyn-thread.c"
- "${LIBRARY_DIR}/lib/curl_gssapi.c"
- "${LIBRARY_DIR}/lib/http_ntlm.c"
- "${LIBRARY_DIR}/lib/curl_ntlm_wb.c"
- "${LIBRARY_DIR}/lib/curl_ntlm_core.c"
- "${LIBRARY_DIR}/lib/curl_sasl.c"
- "${LIBRARY_DIR}/lib/rand.c"
- "${LIBRARY_DIR}/lib/curl_multibyte.c"
- "${LIBRARY_DIR}/lib/conncache.c"
- "${LIBRARY_DIR}/lib/cf-h1-proxy.c"
- "${LIBRARY_DIR}/lib/http2.c"
- "${LIBRARY_DIR}/lib/smb.c"
- "${LIBRARY_DIR}/lib/curl_endian.c"
- "${LIBRARY_DIR}/lib/curl_des.c"
+ "${LIBRARY_DIR}/lib/speedcheck.c"
+ "${LIBRARY_DIR}/lib/splay.c"
+ "${LIBRARY_DIR}/lib/strcase.c"
+ "${LIBRARY_DIR}/lib/strdup.c"
+ "${LIBRARY_DIR}/lib/strerror.c"
+ "${LIBRARY_DIR}/lib/strtok.c"
+ "${LIBRARY_DIR}/lib/strtoofft.c"
"${LIBRARY_DIR}/lib/system_win32.c"
- "${LIBRARY_DIR}/lib/mime.c"
- "${LIBRARY_DIR}/lib/sha256.c"
- "${LIBRARY_DIR}/lib/setopt.c"
- "${LIBRARY_DIR}/lib/curl_path.c"
- "${LIBRARY_DIR}/lib/curl_range.c"
- "${LIBRARY_DIR}/lib/psl.c"
- "${LIBRARY_DIR}/lib/doh.c"
- "${LIBRARY_DIR}/lib/urlapi.c"
- "${LIBRARY_DIR}/lib/curl_get_line.c"
- "${LIBRARY_DIR}/lib/altsvc.c"
- "${LIBRARY_DIR}/lib/socketpair.c"
- "${LIBRARY_DIR}/lib/bufref.c"
- "${LIBRARY_DIR}/lib/bufq.c"
- "${LIBRARY_DIR}/lib/dynbuf.c"
- "${LIBRARY_DIR}/lib/dynhds.c"
- "${LIBRARY_DIR}/lib/hsts.c"
- "${LIBRARY_DIR}/lib/http_aws_sigv4.c"
- "${LIBRARY_DIR}/lib/mqtt.c"
- "${LIBRARY_DIR}/lib/rename.c"
- "${LIBRARY_DIR}/lib/headers.c"
+ "${LIBRARY_DIR}/lib/telnet.c"
"${LIBRARY_DIR}/lib/timediff.c"
- "${LIBRARY_DIR}/lib/vauth/vauth.c"
+ "${LIBRARY_DIR}/lib/timeval.c"
+ "${LIBRARY_DIR}/lib/transfer.c"
+ "${LIBRARY_DIR}/lib/url.c"
+ "${LIBRARY_DIR}/lib/urlapi.c"
"${LIBRARY_DIR}/lib/vauth/cleartext.c"
"${LIBRARY_DIR}/lib/vauth/cram.c"
"${LIBRARY_DIR}/lib/vauth/digest.c"
@@ -138,23 +135,24 @@ set (SRCS
"${LIBRARY_DIR}/lib/vauth/oauth2.c"
"${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c"
"${LIBRARY_DIR}/lib/vauth/spnego_sspi.c"
+ "${LIBRARY_DIR}/lib/vauth/vauth.c"
+ "${LIBRARY_DIR}/lib/version.c"
"${LIBRARY_DIR}/lib/vquic/vquic.c"
- "${LIBRARY_DIR}/lib/vtls/openssl.c"
+ "${LIBRARY_DIR}/lib/vssh/libssh.c"
+ "${LIBRARY_DIR}/lib/vssh/libssh2.c"
+ "${LIBRARY_DIR}/lib/vtls/bearssl.c"
"${LIBRARY_DIR}/lib/vtls/gtls.c"
- "${LIBRARY_DIR}/lib/vtls/vtls.c"
- "${LIBRARY_DIR}/lib/vtls/nss.c"
- "${LIBRARY_DIR}/lib/vtls/wolfssl.c"
+ "${LIBRARY_DIR}/lib/vtls/hostcheck.c"
+ "${LIBRARY_DIR}/lib/vtls/keylog.c"
+ "${LIBRARY_DIR}/lib/vtls/mbedtls.c"
+ "${LIBRARY_DIR}/lib/vtls/openssl.c"
"${LIBRARY_DIR}/lib/vtls/schannel.c"
"${LIBRARY_DIR}/lib/vtls/schannel_verify.c"
"${LIBRARY_DIR}/lib/vtls/sectransp.c"
- "${LIBRARY_DIR}/lib/vtls/gskit.c"
- "${LIBRARY_DIR}/lib/vtls/mbedtls.c"
- "${LIBRARY_DIR}/lib/vtls/bearssl.c"
- "${LIBRARY_DIR}/lib/vtls/keylog.c"
+ "${LIBRARY_DIR}/lib/vtls/vtls.c"
+ "${LIBRARY_DIR}/lib/vtls/wolfssl.c"
"${LIBRARY_DIR}/lib/vtls/x509asn1.c"
- "${LIBRARY_DIR}/lib/vtls/hostcheck.c"
- "${LIBRARY_DIR}/lib/vssh/libssh2.c"
- "${LIBRARY_DIR}/lib/vssh/libssh.c"
+ "${LIBRARY_DIR}/lib/warnless.c"
)
add_library (_curl ${SRCS})
diff --git a/contrib/hashidsxx b/contrib/hashidsxx
deleted file mode 160000
index 783f6911ccf..00000000000
--- a/contrib/hashidsxx
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee
diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt
deleted file mode 100644
index 17f3888bd94..00000000000
--- a/contrib/hashidsxx-cmake/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx")
-
-set (SRCS
- "${LIBRARY_DIR}/hashids.cpp"
-)
-
-set (HDRS
- "${LIBRARY_DIR}/hashids.h"
-)
-
-add_library(_hashidsxx ${SRCS} ${HDRS})
-target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}")
-
-add_library(ch_contrib::hashidsxx ALIAS _hashidsxx)
diff --git a/contrib/idxd-config b/contrib/idxd-config
index f6605c41a73..a836ce0e420 160000
--- a/contrib/idxd-config
+++ b/contrib/idxd-config
@@ -1 +1 @@
-Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
+Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1
diff --git a/contrib/incbin b/contrib/incbin
new file mode 160000
index 00000000000..6e576cae5ab
--- /dev/null
+++ b/contrib/incbin
@@ -0,0 +1 @@
+Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf
diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..5778cf83c22
--- /dev/null
+++ b/contrib/incbin-cmake/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
+add_library(_incbin INTERFACE)
+target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
+add_library(ch_contrib::incbin ALIAS _incbin)
+
+# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled.
+# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
+target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING)
diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt
index 20025dfc63e..15e965ed841 100644
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@@ -1,5 +1,5 @@
if (SANITIZE OR NOT (
- ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64)) OR
+ ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64 OR ARCH_S390X)) OR
(OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
))
if (ENABLE_JEMALLOC)
@@ -17,17 +17,17 @@ if (NOT ENABLE_JEMALLOC)
endif ()
if (NOT OS_LINUX)
- message (WARNING "jemalloc support on non-linux is EXPERIMENTAL")
+ message (WARNING "jemalloc support on non-Linux is EXPERIMENTAL")
endif()
if (OS_LINUX)
- # ThreadPool select job randomly, and there can be some threads that had been
- # performed some memory heavy task before and will be inactive for some time,
- # but until it will became active again, the memory will not be freed since by
- # default each thread has it's own arena, but there should be not more then
+ # ThreadPool select job randomly, and there can be some threads that have been
+ # performed some memory-heavy tasks before and will be inactive for some time,
+ # but until it becomes active again, the memory will not be freed since, by
+ # default, each thread has its arena, but there should be no more than
# 4*CPU arenas (see opt.nareans description).
#
- # By enabling percpu_arena number of arenas limited to number of CPUs and hence
+ # By enabling percpu_arena number of arenas is limited to the number of CPUs, and hence
# this problem should go away.
#
# muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to
@@ -38,7 +38,7 @@ if (OS_LINUX)
else()
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
endif()
-# CACHE variable is empty, to allow changing defaults without necessity
+# CACHE variable is empty to allow changing defaults without the necessity
# to purge cache
set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" )
if (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE)
@@ -148,6 +148,8 @@ elseif (ARCH_PPC64LE)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_ppc64le")
elseif (ARCH_RISCV64)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_riscv64")
+elseif (ARCH_S390X)
+ set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_s390x")
else ()
message (FATAL_ERROR "internal jemalloc: This arch is not supported")
endif ()
@@ -172,7 +174,7 @@ target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1)
# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++.
# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`.
-# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing.
+# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking.
#
# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1).
target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1)
diff --git a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in
new file mode 100644
index 00000000000..531f2bca0c2
--- /dev/null
+++ b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -0,0 +1,435 @@
+/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
+#ifndef JEMALLOC_INTERNAL_DEFS_H_
+#define JEMALLOC_INTERNAL_DEFS_H_
+/*
+ * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
+ * public APIs to be prefixed. This makes it possible, with some care, to use
+ * multiple allocators simultaneously.
+ */
+/* #undef JEMALLOC_PREFIX */
+/* #undef JEMALLOC_CPREFIX */
+
+/*
+ * Define overrides for non-standard allocator-related functions if they are
+ * present on the system.
+ */
+#define JEMALLOC_OVERRIDE___LIBC_CALLOC
+#define JEMALLOC_OVERRIDE___LIBC_FREE
+#define JEMALLOC_OVERRIDE___LIBC_MALLOC
+#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+#define JEMALLOC_OVERRIDE___LIBC_REALLOC
+#define JEMALLOC_OVERRIDE___LIBC_VALLOC
+#define JEMALLOC_OVERRIDE___LIBC_PVALLOC
+/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#define JEMALLOC_PRIVATE_NAMESPACE je_
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#define CPU_SPINWAIT
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#define HAVE_CPU_SPINWAIT 0
+
+/*
+ * Number of significant bits in virtual addresses. This may be less than the
+ * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
+ * bits are the same as bit 47.
+ */
+#define LG_VADDR 64
+
+/* Defined if C11 atomics are available. */
+#define JEMALLOC_C11_ATOMICS
+
+/* Defined if GCC __atomic atomics are available. */
+#define JEMALLOC_GCC_ATOMIC_ATOMICS
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS
+
+/* Defined if GCC __sync atomics are available. */
+#define JEMALLOC_GCC_SYNC_ATOMICS
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_SYNC_ATOMICS
+
+/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#define JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+/* #undef JEMALLOC_OS_UNFAIR_LOCK */
+
+/* Defined if syscall(2) is usable. */
+#define JEMALLOC_USE_SYSCALL
+
+/*
+ * Defined if secure_getenv(3) is available.
+ */
+#define JEMALLOC_HAVE_SECURE_GETENV
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+/* #undef JEMALLOC_HAVE_ISSETUGID */
+
+/* Defined if pthread_atfork(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_ATFORK
+
+/* Defined if pthread_setname_np(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+
+/* Defined if pthread_getname_np(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP
+
+/* Defined if pthread_get_name_np(3) is available. */
+/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_MONOTONIC
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
+
+/*
+ * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_REALTIME
+
+/*
+ * Defined if _malloc_thread_cleanup() exists. At least in the case of
+ * FreeBSD, pthread_key_create() allocates, which if used during malloc
+ * bootstrapping will cause recursion into the pthreads library. Therefore, if
+ * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
+ * malloc_tsd.
+ */
+/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
+
+/*
+ * Defined if threaded initialization is known to be safe on this platform.
+ * Among other things, it must be possible to initialize a mutex without
+ * triggering allocation in order for threaded allocation to be safe.
+ */
+#define JEMALLOC_THREADED_INIT
+
+/*
+ * Defined if the pthreads implementation defines
+ * _pthread_mutex_init_calloc_cb(), in which case the function is used in order
+ * to avoid recursive allocation during mutex initialization.
+ */
+/* #undef JEMALLOC_MUTEX_INIT_CB */
+
+/* Non-empty if the tls_model attribute is supported. */
+#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+/* #undef JEMALLOC_DEBUG */
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#define JEMALLOC_STATS
+
+/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
+/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
+
+/* JEMALLOC_PROF enables allocation profiling. */
+/* #undef JEMALLOC_PROF */
+
+/* Use libunwind for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBUNWIND */
+
+/* Use libgcc for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBGCC */
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_GCC */
+
+/* JEMALLOC_PAGEID enabled page id */
+/* #undef JEMALLOC_PAGEID */
+
+/* JEMALLOC_HAVE_PRCTL checks prctl */
+#define JEMALLOC_HAVE_PRCTL
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
+ * segment (DSS).
+ */
+#define JEMALLOC_DSS
+
+/* Support memory filling (junk/zero). */
+#define JEMALLOC_FILL
+
+/* Support utrace(2)-based tracing. */
+/* #undef JEMALLOC_UTRACE */
+
+/* Support utrace(2)-based tracing (label based signature). */
+/* #undef JEMALLOC_UTRACE_LABEL */
+
+/* Support optional abort() on OOM. */
+/* #undef JEMALLOC_XMALLOC */
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+/* #undef JEMALLOC_LAZY_LOCK */
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+/* #undef LG_QUANTUM */
+
+/* One page is 2^LG_PAGE bytes. */
+#define LG_PAGE 12
+
+/* Maximum number of regions in a slab. */
+/* #undef CONFIG_LG_SLAB_MAXREGS */
+
+/*
+ * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
+ * system does not explicitly support huge pages; system calls that require
+ * explicit huge page support are separately configured.
+ */
+#define LG_HUGEPAGE 20
+
+/*
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
+ */
+#define JEMALLOC_MAPS_COALESCE
+
+/*
+ * If defined, retain memory for later reuse by default rather than using e.g.
+ * munmap() to unmap freed extents. This is enabled on 64-bit Linux because
+ * common sequences of mmap()/munmap() calls will cause virtual memory map
+ * holes.
+ */
+#define JEMALLOC_RETAIN
+
+/* TLS is used to map arenas and magazine caches to threads. */
+#define JEMALLOC_TLS
+
+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
+
+/*
+ * ffs*() functions to use for bitmapping. Don't use these directly; instead,
+ * use ffs_*() from util.h.
+ */
+#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
+#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
+#define JEMALLOC_INTERNAL_FFS __builtin_ffs
+
+/*
+ * popcount*() functions to use for bitmapping.
+ */
+#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
+#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
+
+/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#define JEMALLOC_CACHE_OBLIVIOUS
+
+/*
+ * If defined, enable logging facilities. We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+/* #undef JEMALLOC_LOG */
+
+/*
+ * If defined, use readlinkat() (instead of readlink()) to follow
+ * /etc/malloc_conf.
+ */
+/* #undef JEMALLOC_READLINKAT */
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+/* #undef JEMALLOC_ZONE */
+
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ * /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
+#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+
+/* Defined if madvise(2) is available. */
+#define JEMALLOC_HAVE_MADVISE
+
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#define JEMALLOC_HAVE_MADVISE_HUGE
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ * madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ * will be discarded rather than swapped out.
+ * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
+ * defined, this immediately discards pages,
+ * such that new pages will be demand-zeroed if
+ * the address region is later touched;
+ * otherwise this behaves similarly to
+ * MADV_FREE, though typically with higher
+ * system overhead.
+ */
+#define JEMALLOC_PURGE_MADVISE_FREE
+#define JEMALLOC_PURGE_MADVISE_DONTNEED
+#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
+
+/*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+#define JEMALLOC_MADVISE_DONTDUMP
+
+/*
+ * Defined if MADV_[NO]CORE is supported as an argument to madvise.
+ */
+/* #undef JEMALLOC_MADVISE_NOCORE */
+
+/* Defined if mprotect(2) is available. */
+#define JEMALLOC_HAVE_MPROTECT
+
+/*
+ * Defined if transparent huge pages (THPs) are supported via the
+ * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
+ */
+/* #undef JEMALLOC_THP */
+
+/* Defined if posix_madvise is available. */
+/* #undef JEMALLOC_HAVE_POSIX_MADVISE */
+
+/*
+ * Method for purging unused pages using posix_madvise.
+ *
+ * posix_madvise(..., POSIX_MADV_DONTNEED)
+ */
+/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */
+/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */
+
+/*
+ * Defined if memcntl page admin call is supported
+ */
+/* #undef JEMALLOC_HAVE_MEMCNTL */
+
+/*
+ * Defined if malloc_size is supported
+ */
+/* #undef JEMALLOC_HAVE_MALLOC_SIZE */
+
+/* Define if operating system has alloca.h header. */
+#define JEMALLOC_HAS_ALLOCA_H
+
+/* C99 restrict keyword supported. */
+#define JEMALLOC_HAS_RESTRICT
+
+/* For use by hash code. */
+#define JEMALLOC_BIG_ENDIAN
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#define LG_SIZEOF_INT 2
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#define LG_SIZEOF_LONG 3
+
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#define LG_SIZEOF_LONG_LONG 3
+
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#define LG_SIZEOF_INTMAX_T 3
+
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
+
+/* glibc memalign hook. */
+/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
+
+/* pthread support */
+#define JEMALLOC_HAVE_PTHREAD
+
+/* dlsym() support */
+#define JEMALLOC_HAVE_DLSYM
+
+/* Adaptive mutex support in pthreads. */
+#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
+
+/* GNU specific sched_getcpu support */
+#define JEMALLOC_HAVE_SCHED_GETCPU
+
+/* GNU specific sched_setaffinity support */
+#define JEMALLOC_HAVE_SCHED_SETAFFINITY
+
+/*
+ * If defined, all the features necessary for background threads are present.
+ */
+#define JEMALLOC_BACKGROUND_THREAD
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+/* #undef JEMALLOC_EXPORT */
+
+/* config.malloc_conf options string. */
+#define JEMALLOC_CONFIG_MALLOC_CONF ""
+
+/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
+#define JEMALLOC_IS_MALLOC
+
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
+
+/* Performs additional safety checks when defined. */
+/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
+
+/* Is C++ support being built? */
+#define JEMALLOC_ENABLE_CXX
+
+/* Performs additional size checks when defined. */
+/* #undef JEMALLOC_OPT_SIZE_CHECKS */
+
+/* Allows sampled junk and stash for checking use-after-free when defined. */
+/* #undef JEMALLOC_UAF_DETECTION */
+
+/* Darwin VM_MAKE_TAG support */
+/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */
+
+/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
+#define JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
+
+#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/contrib/libarchive b/contrib/libarchive
new file mode 160000
index 00000000000..ee457961713
--- /dev/null
+++ b/contrib/libarchive
@@ -0,0 +1 @@
+Subproject commit ee45796171324519f0c0bfd012018dd099296336
diff --git a/contrib/libarchive-cmake/CMakeLists.txt b/contrib/libarchive-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..fb64266185e
--- /dev/null
+++ b/contrib/libarchive-cmake/CMakeLists.txt
@@ -0,0 +1,172 @@
+set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive")
+
+set(SRCS
+ "${LIBRARY_DIR}/libarchive/archive_acl.c"
+ "${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c"
+ "${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c"
+ "${LIBRARY_DIR}/libarchive/archive_check_magic.c"
+ "${LIBRARY_DIR}/libarchive/archive_cmdline.c"
+ "${LIBRARY_DIR}/libarchive/archive_cryptor.c"
+ "${LIBRARY_DIR}/libarchive/archive_digest.c"
+ "${LIBRARY_DIR}/libarchive/archive_disk_acl_darwin.c"
+ "${LIBRARY_DIR}/libarchive/archive_disk_acl_freebsd.c"
+ "${LIBRARY_DIR}/libarchive/archive_disk_acl_linux.c"
+ "${LIBRARY_DIR}/libarchive/archive_disk_acl_sunos.c"
+ "${LIBRARY_DIR}/libarchive/archive_entry.c"
+ "${LIBRARY_DIR}/libarchive/archive_entry_copy_bhfi.c"
+ "${LIBRARY_DIR}/libarchive/archive_entry_copy_stat.c"
+ "${LIBRARY_DIR}/libarchive/archive_entry_link_resolver.c"
+ "${LIBRARY_DIR}/libarchive/archive_entry_sparse.c"
+ "${LIBRARY_DIR}/libarchive/archive_entry_stat.c"
+ "${LIBRARY_DIR}/libarchive/archive_entry_strmode.c"
+ "${LIBRARY_DIR}/libarchive/archive_entry_xattr.c"
+ "${LIBRARY_DIR}/libarchive/archive_getdate.c"
+ "${LIBRARY_DIR}/libarchive/archive_hmac.c"
+ "${LIBRARY_DIR}/libarchive/archive_match.c"
+ "${LIBRARY_DIR}/libarchive/archive_options.c"
+ "${LIBRARY_DIR}/libarchive/archive_pack_dev.c"
+ "${LIBRARY_DIR}/libarchive/archive_pathmatch.c"
+ "${LIBRARY_DIR}/libarchive/archive_ppmd7.c"
+ "${LIBRARY_DIR}/libarchive/archive_ppmd8.c"
+ "${LIBRARY_DIR}/libarchive/archive_random.c"
+ "${LIBRARY_DIR}/libarchive/archive_rb.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_add_passphrase.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_append_filter.c"
+ "${LIBRARY_DIR}/libarchive/archive_read.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_data_into_fd.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_extract2.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_extract.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_open_fd.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_open_file.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_open_filename.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_open_memory.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_set_format.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_set_options.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_all.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_by_code.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_bzip2.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_compress.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_grzip.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_gzip.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lrzip.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lz4.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lzop.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_none.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_program.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_rpm.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_uu.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_xz.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_filter_zstd.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_7zip.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_all.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_ar.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_by_code.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_cab.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_cpio.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_empty.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_iso9660.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_lha.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_mtree.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar5.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_raw.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_tar.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_warc.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_xar.c"
+ "${LIBRARY_DIR}/libarchive/archive_read_support_format_zip.c"
+ "${LIBRARY_DIR}/libarchive/archive_string.c"
+ "${LIBRARY_DIR}/libarchive/archive_string_sprintf.c"
+ "${LIBRARY_DIR}/libarchive/archive_util.c"
+ "${LIBRARY_DIR}/libarchive/archive_version_details.c"
+ "${LIBRARY_DIR}/libarchive/archive_virtual.c"
+ "${LIBRARY_DIR}/libarchive/archive_windows.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_b64encode.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_by_name.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_bzip2.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_compress.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_grzip.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_gzip.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lrzip.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lz4.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lzop.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_none.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_program.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_uuencode.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_xz.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_add_filter_zstd.c"
+ "${LIBRARY_DIR}/libarchive/archive_write.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_disk_posix.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_disk_set_standard_lookup.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_disk_windows.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_open_fd.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_open_file.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_open_filename.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_open_memory.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_7zip.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_ar.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_by_name.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_binary.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_newc.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_odc.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_filter_by_ext.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_gnutar.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_iso9660.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_mtree.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_pax.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_raw.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_shar.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_ustar.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_v7tar.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_warc.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_xar.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_format_zip.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_options.c"
+ "${LIBRARY_DIR}/libarchive/archive_write_set_passphrase.c"
+ "${LIBRARY_DIR}/libarchive/filter_fork_posix.c"
+ "${LIBRARY_DIR}/libarchive/filter_fork_windows.c"
+ "${LIBRARY_DIR}/libarchive/xxhash.c"
+)
+
+add_library(_libarchive ${SRCS})
+target_include_directories(_libarchive PUBLIC
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ "${LIBRARY_DIR}/libarchive"
+)
+
+target_compile_definitions(_libarchive PUBLIC
+ HAVE_CONFIG_H
+)
+
+target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
+
+if (TARGET ch_contrib::xz)
+ target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1)
+ target_link_libraries(_libarchive PRIVATE ch_contrib::xz)
+endif()
+
+if (TARGET ch_contrib::zlib)
+ target_compile_definitions(_libarchive PUBLIC HAVE_ZLIB_H=1)
+ target_link_libraries(_libarchive PRIVATE ch_contrib::zlib)
+endif()
+
+if (OS_LINUX)
+ target_compile_definitions(
+ _libarchive PUBLIC
+ MAJOR_IN_SYSMACROS=1
+ HAVE_LINUX_FS_H=1
+ HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1
+ HAVE_LINUX_TYPES_H=1
+ HAVE_SYS_STATFS_H=1
+ HAVE_FUTIMESAT=1
+ HAVE_ICONV=1
+ )
+endif()
+
+add_library(ch_contrib::libarchive ALIAS _libarchive)
\ No newline at end of file
diff --git a/contrib/libarchive-cmake/config.h b/contrib/libarchive-cmake/config.h
new file mode 100644
index 00000000000..0b0cab47a52
--- /dev/null
+++ b/contrib/libarchive-cmake/config.h
@@ -0,0 +1,1391 @@
+/* config.h. Generated from build/cmake/config.h.in by cmake configure */
+#define __LIBARCHIVE_CONFIG_H_INCLUDED 1
+
+/*
+ * Ensure we have C99-style int64_t, etc, all defined.
+ */
+
+/* First, we need to know if the system has already defined them. */
+#define HAVE_INT16_T
+#define HAVE_INT32_T
+#define HAVE_INT64_T
+#define HAVE_INTMAX_T
+
+#define HAVE_UINT8_T
+#define HAVE_UINT16_T
+#define HAVE_UINT32_T
+#define HAVE_UINT64_T
+#define HAVE_UINTMAX_T
+
+/* We might have the types we want under other spellings. */
+/* #undef HAVE___INT64 */
+/* #undef HAVE_U_INT64_T */
+/* #undef HAVE_UNSIGNED___INT64 */
+
+/* The sizes of various standard integer types. */
+#define SIZEOF_SHORT 2
+#define SIZEOF_INT 4
+#define SIZEOF_LONG 8
+#define SIZEOF_LONG_LONG 8
+#define SIZEOF_UNSIGNED_SHORT 2
+#define SIZEOF_UNSIGNED 4
+#define SIZEOF_UNSIGNED_LONG 8
+#define SIZEOF_UNSIGNED_LONG_LONG 8
+
+/*
+ * If we lack int64_t, define it to the first of __int64, int, long, and long long
+ * that exists and is the right size.
+ */
+#if !defined(HAVE_INT64_T) && defined(HAVE___INT64)
+typedef __int64 int64_t;
+#define HAVE_INT64_T
+#endif
+
+#if !defined(HAVE_INT64_T) && SIZEOF_INT == 8
+typedef int int64_t;
+#define HAVE_INT64_T
+#endif
+
+#if !defined(HAVE_INT64_T) && SIZEOF_LONG == 8
+typedef long int64_t;
+#define HAVE_INT64_T
+#endif
+
+#if !defined(HAVE_INT64_T) && SIZEOF_LONG_LONG == 8
+typedef long long int64_t;
+#define HAVE_INT64_T
+#endif
+
+#if !defined(HAVE_INT64_T)
+#error No 64-bit integer type was found.
+#endif
+
+/*
+ * Similarly for int32_t
+ */
+#if !defined(HAVE_INT32_T) && SIZEOF_INT == 4
+typedef int int32_t;
+#define HAVE_INT32_T
+#endif
+
+#if !defined(HAVE_INT32_T) && SIZEOF_LONG == 4
+typedef long int32_t;
+#define HAVE_INT32_T
+#endif
+
+#if !defined(HAVE_INT32_T)
+#error No 32-bit integer type was found.
+#endif
+
+/*
+ * Similarly for int16_t
+ */
+#if !defined(HAVE_INT16_T) && SIZEOF_INT == 2
+typedef int int16_t;
+#define HAVE_INT16_T
+#endif
+
+#if !defined(HAVE_INT16_T) && SIZEOF_SHORT == 2
+typedef short int16_t;
+#define HAVE_INT16_T
+#endif
+
+#if !defined(HAVE_INT16_T)
+#error No 16-bit integer type was found.
+#endif
+
+/*
+ * Similarly for uint64_t
+ */
+#if !defined(HAVE_UINT64_T) && defined(HAVE_UNSIGNED___INT64)
+typedef unsigned __int64 uint64_t;
+#define HAVE_UINT64_T
+#endif
+
+#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED == 8
+typedef unsigned uint64_t;
+#define HAVE_UINT64_T
+#endif
+
+#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG == 8
+typedef unsigned long uint64_t;
+#define HAVE_UINT64_T
+#endif
+
+#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG_LONG == 8
+typedef unsigned long long uint64_t;
+#define HAVE_UINT64_T
+#endif
+
+#if !defined(HAVE_UINT64_T)
+#error No 64-bit unsigned integer type was found.
+#endif
+
+
+/*
+ * Similarly for uint32_t
+ */
+#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED == 4
+typedef unsigned uint32_t;
+#define HAVE_UINT32_T
+#endif
+
+#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED_LONG == 4
+typedef unsigned long uint32_t;
+#define HAVE_UINT32_T
+#endif
+
+#if !defined(HAVE_UINT32_T)
+#error No 32-bit unsigned integer type was found.
+#endif
+
+/*
+ * Similarly for uint16_t
+ */
+#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED == 2
+typedef unsigned uint16_t;
+#define HAVE_UINT16_T
+#endif
+
+#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED_SHORT == 2
+typedef unsigned short uint16_t;
+#define HAVE_UINT16_T
+#endif
+
+#if !defined(HAVE_UINT16_T)
+#error No 16-bit unsigned integer type was found.
+#endif
+
+/*
+ * Similarly for uint8_t
+ */
+#if !defined(HAVE_UINT8_T)
+typedef unsigned char uint8_t;
+#define HAVE_UINT8_T
+#endif
+
+#if !defined(HAVE_UINT8_T)
+#error No 8-bit unsigned integer type was found.
+#endif
+
+/* Define intmax_t and uintmax_t if they are not already defined. */
+#if !defined(HAVE_INTMAX_T)
+typedef int64_t intmax_t;
+#endif
+
+#if !defined(HAVE_UINTMAX_T)
+typedef uint64_t uintmax_t;
+#endif
+
+/* Define ZLIB_WINAPI if zlib was built on Visual Studio. */
+/* #undef ZLIB_WINAPI */
+
+/* Darwin ACL support */
+/* #undef ARCHIVE_ACL_DARWIN */
+
+/* FreeBSD ACL support */
+/* #undef ARCHIVE_ACL_FREEBSD */
+
+/* FreeBSD NFSv4 ACL support */
+/* #undef ARCHIVE_ACL_FREEBSD_NFS4 */
+
+/* Linux POSIX.1e ACL support via libacl */
+/* #undef ARCHIVE_ACL_LIBACL */
+
+/* Linux NFSv4 ACL support via librichacl */
+/* #undef ARCHIVE_ACL_LIBRICHACL */
+
+/* Solaris ACL support */
+/* #undef ARCHIVE_ACL_SUNOS */
+
+/* Solaris NFSv4 ACL support */
+/* #undef ARCHIVE_ACL_SUNOS_NFS4 */
+
+/* MD5 via ARCHIVE_CRYPTO_MD5_LIBC supported. */
+/* #undef ARCHIVE_CRYPTO_MD5_LIBC */
+
+/* MD5 via ARCHIVE_CRYPTO_MD5_LIBSYSTEM supported. */
+/* #undef ARCHIVE_CRYPTO_MD5_LIBSYSTEM */
+
+/* MD5 via ARCHIVE_CRYPTO_MD5_MBEDTLS supported. */
+/* #undef ARCHIVE_CRYPTO_MD5_MBEDTLS */
+
+/* MD5 via ARCHIVE_CRYPTO_MD5_NETTLE supported. */
+/* #undef ARCHIVE_CRYPTO_MD5_NETTLE */
+
+/* MD5 via ARCHIVE_CRYPTO_MD5_OPENSSL supported. */
+/* #undef ARCHIVE_CRYPTO_MD5_OPENSSL */
+
+/* MD5 via ARCHIVE_CRYPTO_MD5_WIN supported. */
+/* #undef ARCHIVE_CRYPTO_MD5_WIN */
+
+/* RMD160 via ARCHIVE_CRYPTO_RMD160_LIBC supported. */
+/* #undef ARCHIVE_CRYPTO_RMD160_LIBC */
+
+/* RMD160 via ARCHIVE_CRYPTO_RMD160_NETTLE supported. */
+/* #undef ARCHIVE_CRYPTO_RMD160_NETTLE */
+
+/* RMD160 via ARCHIVE_CRYPTO_RMD160_MBEDTLS supported. */
+/* #undef ARCHIVE_CRYPTO_RMD160_MBEDTLS */
+
+/* RMD160 via ARCHIVE_CRYPTO_RMD160_OPENSSL supported. */
+/* #undef ARCHIVE_CRYPTO_RMD160_OPENSSL */
+
+/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBC supported. */
+/* #undef ARCHIVE_CRYPTO_SHA1_LIBC */
+
+/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBSYSTEM supported. */
+/* #undef ARCHIVE_CRYPTO_SHA1_LIBSYSTEM */
+
+/* SHA1 via ARCHIVE_CRYPTO_SHA1_MBEDTLS supported. */
+/* #undef ARCHIVE_CRYPTO_SHA1_MBEDTLS */
+
+/* SHA1 via ARCHIVE_CRYPTO_SHA1_NETTLE supported. */
+/* #undef ARCHIVE_CRYPTO_SHA1_NETTLE */
+
+/* SHA1 via ARCHIVE_CRYPTO_SHA1_OPENSSL supported. */
+/* #undef ARCHIVE_CRYPTO_SHA1_OPENSSL */
+
+/* SHA1 via ARCHIVE_CRYPTO_SHA1_WIN supported. */
+/* #undef ARCHIVE_CRYPTO_SHA1_WIN */
+
+/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC supported. */
+/* #undef ARCHIVE_CRYPTO_SHA256_LIBC */
+
+/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC2 supported. */
+/* #undef ARCHIVE_CRYPTO_SHA256_LIBC2 */
+
+/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC3 supported. */
+/* #undef ARCHIVE_CRYPTO_SHA256_LIBC3 */
+
+/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBSYSTEM supported. */
+/* #undef ARCHIVE_CRYPTO_SHA256_LIBSYSTEM */
+
+/* SHA256 via ARCHIVE_CRYPTO_SHA256_MBEDTLS supported. */
+/* #undef ARCHIVE_CRYPTO_SHA256_MBEDTLS */
+
+/* SHA256 via ARCHIVE_CRYPTO_SHA256_NETTLE supported. */
+/* #undef ARCHIVE_CRYPTO_SHA256_NETTLE */
+
+/* SHA256 via ARCHIVE_CRYPTO_SHA256_OPENSSL supported. */
+/* #undef ARCHIVE_CRYPTO_SHA256_OPENSSL */
+
+/* SHA256 via ARCHIVE_CRYPTO_SHA256_WIN supported. */
+/* #undef ARCHIVE_CRYPTO_SHA256_WIN */
+
+/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC supported. */
+/* #undef ARCHIVE_CRYPTO_SHA384_LIBC */
+
+/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC2 supported. */
+/* #undef ARCHIVE_CRYPTO_SHA384_LIBC2 */
+
+/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC3 supported. */
+/* #undef ARCHIVE_CRYPTO_SHA384_LIBC3 */
+
+/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBSYSTEM supported. */
+/* #undef ARCHIVE_CRYPTO_SHA384_LIBSYSTEM */
+
+/* SHA384 via ARCHIVE_CRYPTO_SHA384_MBEDTLS supported. */
+/* #undef ARCHIVE_CRYPTO_SHA384_MBEDTLS */
+
+/* SHA384 via ARCHIVE_CRYPTO_SHA384_NETTLE supported. */
+/* #undef ARCHIVE_CRYPTO_SHA384_NETTLE */
+
+/* SHA384 via ARCHIVE_CRYPTO_SHA384_OPENSSL supported. */
+/* #undef ARCHIVE_CRYPTO_SHA384_OPENSSL */
+
+/* SHA384 via ARCHIVE_CRYPTO_SHA384_WIN supported. */
+/* #undef ARCHIVE_CRYPTO_SHA384_WIN */
+
+/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC supported. */
+/* #undef ARCHIVE_CRYPTO_SHA512_LIBC */
+
+/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC2 supported. */
+/* #undef ARCHIVE_CRYPTO_SHA512_LIBC2 */
+
+/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC3 supported. */
+/* #undef ARCHIVE_CRYPTO_SHA512_LIBC3 */
+
+/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBSYSTEM supported. */
+/* #undef ARCHIVE_CRYPTO_SHA512_LIBSYSTEM */
+
+/* SHA512 via ARCHIVE_CRYPTO_SHA512_MBEDTLS supported. */
+/* #undef ARCHIVE_CRYPTO_SHA512_MBEDTLS */
+
+/* SHA512 via ARCHIVE_CRYPTO_SHA512_NETTLE supported. */
+/* #undef ARCHIVE_CRYPTO_SHA512_NETTLE */
+
+/* SHA512 via ARCHIVE_CRYPTO_SHA512_OPENSSL supported. */
+/* #undef ARCHIVE_CRYPTO_SHA512_OPENSSL */
+
+/* SHA512 via ARCHIVE_CRYPTO_SHA512_WIN supported. */
+/* #undef ARCHIVE_CRYPTO_SHA512_WIN */
+
+/* AIX xattr support */
+/* #undef ARCHIVE_XATTR_AIX */
+
+/* Darwin xattr support */
+/* #undef ARCHIVE_XATTR_DARWIN */
+
+/* FreeBSD xattr support */
+/* #undef ARCHIVE_XATTR_FREEBSD */
+
+/* Linux xattr support */
+/* #undef ARCHIVE_XATTR_LINUX */
+
+/* Version number of bsdcpio */
+#define BSDCPIO_VERSION_STRING "3.7.0"
+
+/* Version number of bsdtar */
+#define BSDTAR_VERSION_STRING "3.7.0"
+
+/* Version number of bsdcat */
+#define BSDCAT_VERSION_STRING "3.7.0"
+
+/* Define to 1 if you have the `acl_create_entry' function. */
+/* #undef HAVE_ACL_CREATE_ENTRY */
+
+/* Define to 1 if you have the `acl_get_fd_np' function. */
+/* #undef HAVE_ACL_GET_FD_NP */
+
+/* Define to 1 if you have the `acl_get_link' function. */
+/* #undef HAVE_ACL_GET_LINK */
+
+/* Define to 1 if you have the `acl_get_link_np' function. */
+/* #undef HAVE_ACL_GET_LINK_NP */
+
+/* Define to 1 if you have the `acl_get_perm' function. */
+/* #undef HAVE_ACL_GET_PERM */
+
+/* Define to 1 if you have the `acl_get_perm_np' function. */
+/* #undef HAVE_ACL_GET_PERM_NP */
+
+/* Define to 1 if you have the `acl_init' function. */
+/* #undef HAVE_ACL_INIT */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_ACL_LIBACL_H */
+
+/* Define to 1 if the system has the type `acl_permset_t'. */
+/* #undef HAVE_ACL_PERMSET_T */
+
+/* Define to 1 if you have the `acl_set_fd' function. */
+/* #undef HAVE_ACL_SET_FD */
+
+/* Define to 1 if you have the `acl_set_fd_np' function. */
+/* #undef HAVE_ACL_SET_FD_NP */
+
+/* Define to 1 if you have the `acl_set_file' function. */
+/* #undef HAVE_ACL_SET_FILE */
+
+/* Define to 1 if you have the `arc4random_buf' function. */
+/* #undef HAVE_ARC4RANDOM_BUF */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_ATTR_XATTR_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_BCRYPT_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_BSDXML_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_BZLIB_H */
+
+/* Define to 1 if you have the `chflags' function. */
+/* #undef HAVE_CHFLAGS */
+
+/* Define to 1 if you have the `chown' function. */
+#define HAVE_CHOWN 1
+
+/* Define to 1 if you have the `chroot' function. */
+#define HAVE_CHROOT 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_COPYFILE_H */
+
+/* Define to 1 if you have the `ctime_r' function. */
+#define HAVE_CTIME_R 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_CTYPE_H 1
+
+/* Define to 1 if you have the `cygwin_conv_path' function. */
+/* #undef HAVE_CYGWIN_CONV_PATH */
+
+/* Define to 1 if you have the declaration of `ACE_GETACL', and to 0 if you
+ don't. */
+/* #undef HAVE_DECL_ACE_GETACL */
+
+/* Define to 1 if you have the declaration of `ACE_GETACLCNT', and to 0 if you
+ don't. */
+/* #undef HAVE_DECL_ACE_GETACLCNT */
+
+/* Define to 1 if you have the declaration of `ACE_SETACL', and to 0 if you
+ don't. */
+/* #undef HAVE_DECL_ACE_SETACL */
+
+/* Define to 1 if you have the declaration of `ACL_SYNCHRONIZE', and to 0 if
+ you don't. */
+/* #undef HAVE_DECL_ACL_SYNCHRONIZE */
+
+/* Define to 1 if you have the declaration of `ACL_TYPE_EXTENDED', and to 0 if
+ you don't. */
+/* #undef HAVE_DECL_ACL_TYPE_EXTENDED */
+
+/* Define to 1 if you have the declaration of `ACL_TYPE_NFS4', and to 0 if you
+ don't. */
+/* #undef HAVE_DECL_ACL_TYPE_NFS4 */
+
+/* Define to 1 if you have the declaration of `ACL_USER', and to 0 if you
+ don't. */
+/* #undef HAVE_DECL_ACL_USER */
+
+/* Define to 1 if you have the declaration of `INT32_MAX', and to 0 if you
+ don't. */
+#define HAVE_DECL_INT32_MAX 1
+
+/* Define to 1 if you have the declaration of `INT32_MIN', and to 0 if you
+ don't. */
+#define HAVE_DECL_INT32_MIN 1
+
+/* Define to 1 if you have the declaration of `INT64_MAX', and to 0 if you
+ don't. */
+#define HAVE_DECL_INT64_MAX 1
+
+/* Define to 1 if you have the declaration of `INT64_MIN', and to 0 if you
+ don't. */
+#define HAVE_DECL_INT64_MIN 1
+
+/* Define to 1 if you have the declaration of `INTMAX_MAX', and to 0 if you
+ don't. */
+#define HAVE_DECL_INTMAX_MAX 1
+
+/* Define to 1 if you have the declaration of `INTMAX_MIN', and to 0 if you
+ don't. */
+#define HAVE_DECL_INTMAX_MIN 1
+
+/* Define to 1 if you have the declaration of `SETACL', and to 0 if you don't.
+ */
+/* #undef HAVE_DECL_SETACL */
+
+/* Define to 1 if you have the declaration of `SIZE_MAX', and to 0 if you
+ don't. */
+#define HAVE_DECL_SIZE_MAX 1
+
+/* Define to 1 if you have the declaration of `SSIZE_MAX', and to 0 if you
+ don't. */
+#define HAVE_DECL_SSIZE_MAX 1
+
+/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you
+ don't. */
+#define HAVE_DECL_STRERROR_R 1
+
+/* Define to 1 if you have the declaration of `UINT32_MAX', and to 0 if you
+ don't. */
+#define HAVE_DECL_UINT32_MAX 1
+
+/* Define to 1 if you have the declaration of `UINT64_MAX', and to 0 if you
+ don't. */
+#define HAVE_DECL_UINT64_MAX 1
+
+/* Define to 1 if you have the declaration of `UINTMAX_MAX', and to 0 if you
+ don't. */
+#define HAVE_DECL_UINTMAX_MAX 1
+
+/* Define to 1 if you have the declaration of `XATTR_NOFOLLOW', and to 0 if
+ you don't. */
+/* #undef HAVE_DECL_XATTR_NOFOLLOW */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_DIRECT_H */
+
+/* Define to 1 if you have the header file, and it defines `DIR'.
+ */
+#define HAVE_DIRENT_H 1
+
+/* Define to 1 if you have the `dirfd' function. */
+#define HAVE_DIRFD 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */
+/* #undef HAVE_DOPRNT */
+
+/* Define to 1 if nl_langinfo supports D_MD_ORDER */
+/* #undef HAVE_D_MD_ORDER */
+
+/* A possible errno value for invalid file format errors */
+/* #undef HAVE_EFTYPE */
+
+/* A possible errno value for invalid file format errors */
+#define HAVE_EILSEQ 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_ERRNO_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_EXPAT_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_EXT2FS_EXT2_FS_H */
+
+/* Define to 1 if you have the `extattr_get_file' function. */
+/* #undef HAVE_EXTATTR_GET_FILE */
+
+/* Define to 1 if you have the `extattr_list_file' function. */
+/* #undef HAVE_EXTATTR_LIST_FILE */
+
+/* Define to 1 if you have the `extattr_set_fd' function. */
+/* #undef HAVE_EXTATTR_SET_FD */
+
+/* Define to 1 if you have the `extattr_set_file' function. */
+/* #undef HAVE_EXTATTR_SET_FILE */
+
+/* Define to 1 if EXTATTR_NAMESPACE_USER is defined in sys/extattr.h. */
+/* #undef HAVE_DECL_EXTATTR_NAMESPACE_USER */
+
+/* Define to 1 if you have the declaration of `GETACL', and to 0 if you don't.
+ */
+/* #undef HAVE_DECL_GETACL */
+
+/* Define to 1 if you have the declaration of `GETACLCNT', and to 0 if you
+ don't. */
+/* #undef HAVE_DECL_GETACLCNT */
+
+/* Define to 1 if you have the `fchdir' function. */
+#define HAVE_FCHDIR 1
+
+/* Define to 1 if you have the `fchflags' function. */
+/* #undef HAVE_FCHFLAGS */
+
+/* Define to 1 if you have the `fchmod' function. */
+#define HAVE_FCHMOD 1
+
+/* Define to 1 if you have the `fchown' function. */
+#define HAVE_FCHOWN 1
+
+/* Define to 1 if you have the `fcntl' function. */
+#define HAVE_FCNTL 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `fdopendir' function. */
+#define HAVE_FDOPENDIR 1
+
+/* Define to 1 if you have the `fgetea' function. */
+/* #undef HAVE_FGETEA */
+
+/* Define to 1 if you have the `fgetxattr' function. */
+/* #undef HAVE_FGETXATTR */
+
+/* Define to 1 if you have the `flistea' function. */
+/* #undef HAVE_FLISTEA */
+
+/* Define to 1 if you have the `flistxattr' function. */
+#define HAVE_FLISTXATTR 1
+
+/* Define to 1 if you have the `fnmatch' function. */
+#define HAVE_FNMATCH 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_FNMATCH_H 1
+
+/* Define to 1 if you have the `fork' function. */
+#define HAVE_FORK 1
+
+/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */
+#define HAVE_FSEEKO 1
+
+/* Define to 1 if you have the `fsetea' function. */
+/* #undef HAVE_FSETEA */
+
+/* Define to 1 if you have the `fsetxattr' function. */
+/* #undef HAVE_FSETXATTR */
+
+/* Define to 1 if you have the `fstat' function. */
+#define HAVE_FSTAT 1
+
+/* Define to 1 if you have the `fstatat' function. */
+#define HAVE_FSTATAT 1
+
+/* Define to 1 if you have the `fstatfs' function. */
+#define HAVE_FSTATFS 1
+
+/* Define to 1 if you have the `fstatvfs' function. */
+#define HAVE_FSTATVFS 1
+
+/* Define to 1 if you have the `ftruncate' function. */
+#define HAVE_FTRUNCATE 1
+
+/* Define to 1 if you have the `futimens' function. */
+#define HAVE_FUTIMENS 1
+
+/* Define to 1 if you have the `futimes' function. */
+#define HAVE_FUTIMES 1
+
+/* Define to 1 if you have the `futimesat' function. */
+/* #undef HAVE_FUTIMESAT */
+
+/* Define to 1 if you have the `getea' function. */
+/* #undef HAVE_GETEA */
+
+/* Define to 1 if you have the `geteuid' function. */
+#define HAVE_GETEUID 1
+
+/* Define to 1 if you have the `getgrgid_r' function. */
+#define HAVE_GETGRGID_R 1
+
+/* Define to 1 if you have the `getgrnam_r' function. */
+#define HAVE_GETGRNAM_R 1
+
+/* Define to 1 if platform uses `optreset` to reset `getopt` */
+#define HAVE_GETOPT_OPTRESET 1
+
+/* Define to 1 if you have the `getpid' function. */
+#define HAVE_GETPID 1
+
+/* Define to 1 if you have the `getpwnam_r' function. */
+#define HAVE_GETPWNAM_R 1
+
+/* Define to 1 if you have the `getpwuid_r' function. */
+#define HAVE_GETPWUID_R 1
+
+/* Define to 1 if you have the `getvfsbyname' function. */
+/* #undef HAVE_GETVFSBYNAME */
+
+/* Define to 1 if you have the `getxattr' function. */
+#define HAVE_GETXATTR 1
+
+/* Define to 1 if you have the `gmtime_r' function. */
+#define HAVE_GMTIME_R 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_GRP_H 1
+
+/* Define to 1 if you have the `iconv' function. */
+/* #undef HAVE_ICONV */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_ICONV_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_IO_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_LANGINFO_H 1
+
+/* Define to 1 if you have the `lchflags' function. */
+/* #undef HAVE_LCHFLAGS */
+
+/* Define to 1 if you have the `lchmod' function. */
+#define HAVE_LCHMOD 1
+
+/* Define to 1 if you have the `lchown' function. */
+#define HAVE_LCHOWN 1
+
+/* Define to 1 if you have the `lgetea' function. */
+/* #undef HAVE_LGETEA */
+
+/* Define to 1 if you have the `lgetxattr' function. */
+#define HAVE_LGETXATTR 1
+
+/* Define to 1 if you have the `acl' library (-lacl). */
+/* #undef HAVE_LIBACL */
+
+/* Define to 1 if you have the `attr' library (-lattr). */
+/* #undef HAVE_LIBATTR */
+
+/* Define to 1 if you have the `bsdxml' library (-lbsdxml). */
+/* #undef HAVE_LIBBSDXML */
+
+/* Define to 1 if you have the `bz2' library (-lbz2). */
+/* #undef HAVE_LIBBZ2 */
+
+/* Define to 1 if you have the `b2' library (-lb2). */
+/* #undef HAVE_LIBB2 */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_BLAKE2_H */
+
+/* Define to 1 if you have the `charset' library (-lcharset). */
+/* #undef HAVE_LIBCHARSET */
+
+/* Define to 1 if you have the `crypto' library (-lcrypto). */
+/* #undef HAVE_LIBCRYPTO */
+
+/* Define to 1 if you have the `expat' library (-lexpat). */
+/* #undef HAVE_LIBEXPAT */
+
+/* Define to 1 if you have the `gcc' library (-lgcc). */
+/* #undef HAVE_LIBGCC */
+
+/* Define to 1 if you have the `lz4' library (-llz4). */
+/* #undef HAVE_LIBLZ4 */
+
+/* Define to 1 if you have the `lzma' library (-llzma). */
+/* #undef HAVE_LIBLZMA */
+
+/* Define to 1 if you have the `lzmadec' library (-llzmadec). */
+/* #undef HAVE_LIBLZMADEC */
+
+/* Define to 1 if you have the `lzo2' library (-llzo2). */
+/* #undef HAVE_LIBLZO2 */
+
+/* Define to 1 if you have the `mbedcrypto' library (-lmbedcrypto). */
+/* #undef HAVE_LIBMBEDCRYPTO */
+
+/* Define to 1 if you have the `nettle' library (-lnettle). */
+/* #undef HAVE_LIBNETTLE */
+
+/* Define to 1 if you have the `pcre' library (-lpcre). */
+/* #undef HAVE_LIBPCRE */
+
+/* Define to 1 if you have the `pcreposix' library (-lpcreposix). */
+/* #undef HAVE_LIBPCREPOSIX */
+
+/* Define to 1 if you have the `xml2' library (-lxml2). */
+#define HAVE_LIBXML2 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LIBXML_XMLREADER_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LIBXML_XMLWRITER_H */
+
+/* Define to 1 if you have the `z' library (-lz). */
+/* #undef HAVE_LIBZ */
+
+/* Define to 1 if you have the `zstd' library (-lzstd). */
+/* #undef HAVE_LIBZSTD */
+
+/* Define to 1 if you have the `zstd' library (-lzstd) with compression
+ support. */
+/* #undef HAVE_LIBZSTD_COMPRESSOR */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define to 1 if you have the `link' function. */
+#define HAVE_LINK 1
+
+/* Define to 1 if you have the `linkat' function. */
+#define HAVE_LINKAT 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LINUX_FIEMAP_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LINUX_FS_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LINUX_MAGIC_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LINUX_TYPES_H */
+
+/* Define to 1 if you have the `listea' function. */
+/* #undef HAVE_LISTEA */
+
+/* Define to 1 if you have the `listxattr' function. */
+#define HAVE_LISTXATTR 1
+
+/* Define to 1 if you have the `llistea' function. */
+/* #undef HAVE_LLISTEA */
+
+/* Define to 1 if you have the `llistxattr' function. */
+#define HAVE_LLISTXATTR 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LOCALCHARSET_H */
+
+/* Define to 1 if you have the `locale_charset' function. */
+/* #undef HAVE_LOCALE_CHARSET */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_LOCALE_H 1
+
+/* Define to 1 if you have the `localtime_r' function. */
+#define HAVE_LOCALTIME_R 1
+
+/* Define to 1 if the system has the type `long long int'. */
+/* #undef HAVE_LONG_LONG_INT */
+
+/* Define to 1 if you have the `lsetea' function. */
+/* #undef HAVE_LSETEA */
+
+/* Define to 1 if you have the `lsetxattr' function. */
+#define HAVE_LSETXATTR 1
+
+/* Define to 1 if you have the `lstat' function. */
+#define HAVE_LSTAT 1
+
+/* Define to 1 if `lstat' has the bug that it succeeds when given the
+ zero-length file name argument. */
+/* #undef HAVE_LSTAT_EMPTY_STRING_BUG */
+
+/* Define to 1 if you have the `lutimes' function. */
+#define HAVE_LUTIMES 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LZ4HC_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LZ4_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LZMADEC_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LZMA_H */
+
+/* Define to 1 if you have a working `lzma_stream_encoder_mt' function. */
+/* #undef HAVE_LZMA_STREAM_ENCODER_MT */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LZO_LZO1X_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LZO_LZOCONF_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_MBEDTLS_AES_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_MBEDTLS_MD_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_MBEDTLS_PKCS5_H */
+
+/* Define to 1 if you have the `mbrtowc' function. */
+/* #undef HAVE_MBRTOWC */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_MEMBERSHIP_H */
+
+/* Define to 1 if you have the `memmove' function. */
+#define HAVE_MEMMOVE 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `mkdir' function. */
+#define HAVE_MKDIR 1
+
+/* Define to 1 if you have the `mkfifo' function. */
+#define HAVE_MKFIFO 1
+
+/* Define to 1 if you have the `mknod' function. */
+#define HAVE_MKNOD 1
+
+/* Define to 1 if you have the `mkstemp' function. */
+#define HAVE_MKSTEMP 1
+
+/* Define to 1 if you have the header file, and it defines `DIR'. */
+/* #undef HAVE_NDIR_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NETTLE_AES_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NETTLE_HMAC_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NETTLE_MD5_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NETTLE_PBKDF2_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NETTLE_RIPEMD160_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NETTLE_SHA_H */
+
+/* Define to 1 if you have the `nl_langinfo' function. */
+/* #undef HAVE_NL_LANGINFO */
+
+/* Define to 1 if you have the `openat' function. */
+#define HAVE_OPENAT 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_OPENSSL_EVP_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_PATHS_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_PCREPOSIX_H */
+
+/* Define to 1 if you have the `pipe' function. */
+#define HAVE_PIPE 1
+
+/* Define to 1 if you have the `PKCS5_PBKDF2_HMAC_SHA1' function. */
+/* #undef HAVE_PKCS5_PBKDF2_HMAC_SHA1 */
+
+/* Define to 1 if you have the `poll' function. */
+#define HAVE_POLL 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_POLL_H 1
+
+/* Define to 1 if you have the `posix_spawnp' function. */
+#define HAVE_POSIX_SPAWNP 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_PROCESS_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_PTHREAD_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_PWD_H 1
+
+/* Define to 1 if you have the `readdir_r' function. */
+#define HAVE_READDIR_R 1
+
+/* Define to 1 if you have the `readlink' function. */
+#define HAVE_READLINK 1
+
+/* Define to 1 if you have the `readlinkat' function. */
+#define HAVE_READLINKAT 1
+
+/* Define to 1 if you have the `readpassphrase' function. */
+/* #undef HAVE_READPASSPHRASE */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_READPASSPHRASE_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_REGEX_H 1
+
+/* Define to 1 if you have the `select' function. */
+#define HAVE_SELECT 1
+
+/* Define to 1 if you have the `setenv' function. */
+#define HAVE_SETENV 1
+
+/* Define to 1 if you have the `setlocale' function. */
+#define HAVE_SETLOCALE 1
+
+/* Define to 1 if you have the `sigaction' function. */
+#define HAVE_SIGACTION 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SIGNAL_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SPAWN_H 1
+
+/* Define to 1 if you have the `statfs' function. */
+#define HAVE_STATFS 1
+
+/* Define to 1 if you have the `statvfs' function. */
+#define HAVE_STATVFS 1
+
+/* Define to 1 if `stat' has the bug that it succeeds when given the
+ zero-length file name argument. */
+/* #undef HAVE_STAT_EMPTY_STRING_BUG */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STDARG_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the `strchr' function. */
+#define HAVE_STRCHR 1
+
+/* Define to 1 if you have the `strnlen' function. */
+#define HAVE_STRNLEN 1
+
+/* Define to 1 if you have the `strdup' function. */
+#define HAVE_STRDUP 1
+
+/* Define to 1 if you have the `strerror' function. */
+#define HAVE_STRERROR 1
+
+/* Define to 1 if you have the `strerror_r' function. */
+#define HAVE_STRERROR_R 1
+
+/* Define to 1 if you have the `strftime' function. */
+#define HAVE_STRFTIME 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strrchr' function. */
+#define HAVE_STRRCHR 1
+
+/* Define to 1 if `f_namemax' is a member of `struct statfs'. */
+/* #undef HAVE_STRUCT_STATFS_F_NAMEMAX */
+
+/* Define to 1 if `f_iosize' is a member of `struct statvfs'. */
+/* #undef HAVE_STRUCT_STATVFS_F_IOSIZE */
+
+/* Define to 1 if `st_birthtime' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIME */
+
+/* Define to 1 if `st_birthtimespec.tv_nsec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC */
+
+/* Define to 1 if `st_blksize' is a member of `struct stat'. */
+#define HAVE_STRUCT_STAT_ST_BLKSIZE 1
+
+/* Define to 1 if `st_flags' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_FLAGS */
+
+/* Define to 1 if `st_mtimespec.tv_nsec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC */
+
+/* Define to 1 if `st_mtime_n' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_MTIME_N */
+
+/* Define to 1 if `st_mtime_usec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_MTIME_USEC */
+
+/* Define to 1 if `st_mtim.tv_nsec' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC */
+
+/* Define to 1 if `st_umtime' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_UMTIME */
+
+/* Define to 1 if `tm_gmtoff' is a member of `struct tm'. */
+#define HAVE_STRUCT_TM_TM_GMTOFF 1
+
+/* Define to 1 if `__tm_gmtoff' is a member of `struct tm'. */
+/* #undef HAVE_STRUCT_TM___TM_GMTOFF */
+
+/* Define to 1 if you have `struct vfsconf'. */
+/* #undef HAVE_STRUCT_VFSCONF */
+
+/* Define to 1 if you have `struct xvfsconf'. */
+/* #undef HAVE_STRUCT_XVFSCONF */
+
+/* Define to 1 if you have the `symlink' function. */
+#define HAVE_SYMLINK 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_ACL_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_CDEFS_H */
+
+/* Define to 1 if you have the header file, and it defines `DIR'.
+ */
+/* #undef HAVE_SYS_DIR_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_EA_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_EXTATTR_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_IOCTL_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_MKDEV_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_MOUNT_H */
+
+/* Define to 1 if you have the header file, and it defines `DIR'.
+ */
+/* #undef HAVE_SYS_NDIR_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_POLL_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_QUEUE_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_RICHACL_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_SELECT_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_STATFS_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_STATVFS_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_STAT_H 1
+
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_SYSMACROS_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_UTIME_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_UTSNAME_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_VFS_H 1
+
+/* Define to 1 if you have that is POSIX.1 compatible. */
+#define HAVE_SYS_WAIT_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_XATTR_H */
+
+/* Define to 1 if you have the `timegm' function. */
+#define HAVE_TIMEGM 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_TIME_H 1
+
+/* Define to 1 if you have the `tzset' function. */
+#define HAVE_TZSET 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `unlinkat' function. */
+#define HAVE_UNLINKAT 1
+
+/* Define to 1 if you have the `unsetenv' function. */
+#define HAVE_UNSETENV 1
+
+/* Define to 1 if the system has the type `unsigned long long'. */
+/* #undef HAVE_UNSIGNED_LONG_LONG */
+
+/* Define to 1 if the system has the type `unsigned long long int'. */
+/* #undef HAVE_UNSIGNED_LONG_LONG_INT */
+
+/* Define to 1 if you have the `utime' function. */
+#define HAVE_UTIME 1
+
+/* Define to 1 if you have the `utimensat' function. */
+#define HAVE_UTIMENSAT 1
+
+/* Define to 1 if you have the `utimes' function. */
+#define HAVE_UTIMES 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_UTIME_H 1
+
+/* Define to 1 if you have the `vfork' function. */
+#define HAVE_VFORK 1
+
+/* Define to 1 if you have the `vprintf' function. */
+#define HAVE_VPRINTF 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_WCHAR_H 1
+
+/* Define to 1 if the system has the type `wchar_t'. */
+#define HAVE_WCHAR_T 1
+
+/* Define to 1 if you have the `wcrtomb' function. */
+#define HAVE_WCRTOMB 1
+
+/* Define to 1 if you have the `wcscmp' function. */
+#define HAVE_WCSCMP 1
+
+/* Define to 1 if you have the `wcscpy' function. */
+#define HAVE_WCSCPY 1
+
+/* Define to 1 if you have the `wcslen' function. */
+#define HAVE_WCSLEN 1
+
+/* Define to 1 if you have the `wctomb' function. */
+#define HAVE_WCTOMB 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_WCTYPE_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_WINCRYPT_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_WINDOWS_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_WINIOCTL_H */
+
+/* Define to 1 if you have _CrtSetReportMode in */
+/* #undef HAVE__CrtSetReportMode */
+
+/* Define to 1 if you have the `wmemcmp' function. */
+#define HAVE_WMEMCMP 1
+
+/* Define to 1 if you have the `wmemcpy' function. */
+#define HAVE_WMEMCPY 1
+
+/* Define to 1 if you have the `wmemmove' function. */
+#define HAVE_WMEMMOVE 1
+
+/* Define to 1 if you have a working EXT2_IOC_GETFLAGS */
+/* #undef HAVE_WORKING_EXT2_IOC_GETFLAGS */
+
+/* Define to 1 if you have a working FS_IOC_GETFLAGS */
+#define HAVE_WORKING_FS_IOC_GETFLAGS 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_ZLIB_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_ZSTD_H */
+
+/* Define to 1 if you have the `ctime_s' function. */
+/* #undef HAVE_CTIME_S */
+
+/* Define to 1 if you have the `_fseeki64' function. */
+/* #undef HAVE__FSEEKI64 */
+
+/* Define to 1 if you have the `_get_timezone' function. */
+/* #undef HAVE__GET_TIMEZONE */
+
+/* Define to 1 if you have the `gmtime_s' function. */
+/* #undef HAVE_GMTIME_S */
+
+/* Define to 1 if you have the `localtime_s' function. */
+/* #undef HAVE_LOCALTIME_S */
+
+/* Define to 1 if you have the `_mkgmtime' function. */
+/* #undef HAVE__MKGMTIME */
+
+/* Define as const if the declaration of iconv() needs const. */
+#define ICONV_CONST
+
+/* Version number of libarchive as a single integer */
+#define LIBARCHIVE_VERSION_NUMBER "3007000"
+
+/* Version number of libarchive */
+#define LIBARCHIVE_VERSION_STRING "3.7.0"
+
+/* Define to 1 if `lstat' dereferences a symlink specified with a trailing
+ slash. */
+/* #undef LSTAT_FOLLOWS_SLASHED_SYMLINK */
+
+/* Define to 1 if `major', `minor', and `makedev' are declared in .
+ */
+/* #undef MAJOR_IN_MKDEV */
+
+/* Define to 1 if `major', `minor', and `makedev' are declared in
+ . */
+/* #undef MAJOR_IN_SYSMACROS */
+
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+/* #undef NO_MINUS_C_MINUS_O */
+
+/* The size of `wchar_t', as computed by sizeof. */
+#define SIZEOF_WCHAR_T 4
+
+/* Define to 1 if strerror_r returns char *. */
+/* #undef STRERROR_R_CHAR_P */
+
+/* Define to 1 if you can safely include both and . */
+/* #undef TIME_WITH_SYS_TIME */
+
+/*
+ * Some platform requires a macro to use extension functions.
+ */
+#define SAFE_TO_DEFINE_EXTENSIONS 1
+#ifdef SAFE_TO_DEFINE_EXTENSIONS
+/* Enable extensions on AIX 3, Interix. */
+#ifndef _ALL_SOURCE
+# define _ALL_SOURCE 1
+#endif
+/* Enable GNU extensions on systems that have them. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+/* Enable threading extensions on Solaris. */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+/* Enable extensions on HP NonStop. */
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
+/* Enable general extensions on Solaris. */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+#endif /* SAFE_TO_DEFINE_EXTENSIONS */
+
+/* Version number of package */
+#define VERSION "3.7.0"
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+/* #undef _FILE_OFFSET_BITS */
+
+/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */
+/* #undef _LARGEFILE_SOURCE */
+
+/* Define for large files, on AIX-style hosts. */
+/* #undef _LARGE_FILES */
+
+/* Define to control Windows SDK version */
+#ifndef NTDDI_VERSION
+/* #undef NTDDI_VERSION */
+#endif // NTDDI_VERSION
+
+#ifndef _WIN32_WINNT
+/* #undef _WIN32_WINNT */
+#endif // _WIN32_WINNT
+
+#ifndef WINVER
+/* #undef WINVER */
+#endif // WINVER
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef const */
+
+/* Define to `int' if doesn't define. */
+/* #undef gid_t */
+
+/* Define to `unsigned long' if does not define. */
+/* #undef id_t */
+
+/* Define to `int' if does not define. */
+/* #undef mode_t */
+
+/* Define to `long long' if does not define. */
+/* #undef off_t */
+
+/* Define to `int' if doesn't define. */
+/* #undef pid_t */
+
+/* Define to `unsigned int' if does not define. */
+/* #undef size_t */
+
+/* Define to `int' if does not define. */
+/* #undef ssize_t */
+
+/* Define to `int' if doesn't define. */
+/* #undef uid_t */
+
+/* Define to `int' if does not define. */
+/* #undef intptr_t */
+
+/* Define to `unsigned int' if does not define. */
+/* #undef uintptr_t */
diff --git a/contrib/libmetrohash/src/platform.h b/contrib/libmetrohash/src/platform.h
index bc00e5a286b..9e83d11cb7c 100644
--- a/contrib/libmetrohash/src/platform.h
+++ b/contrib/libmetrohash/src/platform.h
@@ -17,7 +17,8 @@
#ifndef METROHASH_PLATFORM_H
#define METROHASH_PLATFORM_H
-#include
+#include
+#include
#include
// rotate right idiom recognized by most compilers
@@ -33,6 +34,11 @@ inline static uint64_t read_u64(const void * const ptr)
// so we use memcpy() which is the most portable. clang & gcc usually translates `memcpy()` into a single `load` instruction
// when hardware supports it, so using memcpy() is efficient too.
memcpy(&result, ptr, sizeof(result));
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ result = std::byteswap(result);
+#endif
+
return result;
}
@@ -40,6 +46,11 @@ inline static uint64_t read_u32(const void * const ptr)
{
uint32_t result;
memcpy(&result, ptr, sizeof(result));
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ result = std::byteswap(result);
+#endif
+
return result;
}
@@ -47,6 +58,11 @@ inline static uint64_t read_u16(const void * const ptr)
{
uint16_t result;
memcpy(&result, ptr, sizeof(result));
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ result = std::byteswap(result);
+#endif
+
return result;
}
diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt
deleted file mode 100644
index 5380269c479..00000000000
--- a/contrib/nlp-data-cmake/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
-
-set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
-
-add_library (_nlp_data INTERFACE)
-
-clickhouse_embed_binaries(
- TARGET nlp_dictionaries
- RESOURCE_DIR "${LIBRARY_DIR}"
- RESOURCES charset.zst tonality_ru.zst programming.zst
-)
-
-add_dependencies(_nlp_data nlp_dictionaries)
-target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}")
-add_library(ch_contrib::nlp_data ALIAS _nlp_data)
diff --git a/contrib/qpl b/contrib/qpl
index 3f8f5cea277..faaf1935045 160000
--- a/contrib/qpl
+++ b/contrib/qpl
@@ -1 +1 @@
-Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679
+Subproject commit faaf19350459c076e66bb5df11743c3fade59b73
diff --git a/docker/README.md b/docker/README.md
index ec52ddd143e..c1bb3b49f00 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,5 +1,5 @@
## ClickHouse Dockerfiles
-This directory contain Dockerfiles for `clickhouse-client` and `clickhouse-server`. They are updated in each release.
+This directory contain Dockerfiles for `clickhouse-server`. They are updated in each release.
-Also there is bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`.
+Also, there is a bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`.
diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile
deleted file mode 100644
index 1c185daec75..00000000000
--- a/docker/client/Dockerfile
+++ /dev/null
@@ -1,34 +0,0 @@
-FROM ubuntu:18.04
-
-# ARG for quick switch to a given ubuntu mirror
-ARG apt_archive="http://archive.ubuntu.com"
-RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
-
-ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
-ARG version=22.1.1.*
-
-RUN apt-get update \
- && apt-get install --yes --no-install-recommends \
- apt-transport-https \
- ca-certificates \
- dirmngr \
- gnupg \
- && mkdir -p /etc/apt/sources.list.d \
- && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \
- && echo $repository > /etc/apt/sources.list.d/clickhouse.list \
- && apt-get update \
- && env DEBIAN_FRONTEND=noninteractive \
- apt-get install --allow-unauthenticated --yes --no-install-recommends \
- clickhouse-client=$version \
- clickhouse-common-static=$version \
- locales \
- tzdata \
- && rm -rf /var/lib/apt/lists/* /var/cache/debconf \
- && apt-get clean
-
-RUN locale-gen en_US.UTF-8
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US:en
-ENV LC_ALL en_US.UTF-8
-
-ENTRYPOINT ["/usr/bin/clickhouse-client"]
diff --git a/docker/client/README.md b/docker/client/README.md
deleted file mode 100644
index bbcc7d60794..00000000000
--- a/docker/client/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# ClickHouse Client Docker Image
-
-For more information see [ClickHouse Server Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/).
-
-## License
-
-View [license information](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) for the software contained in this image.
diff --git a/docker/images.json b/docker/images.json
index e8fc329a640..d895e2da2f0 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -125,6 +125,7 @@
"docker/test/keeper-jepsen",
"docker/test/server-jepsen",
"docker/test/sqllogic",
+ "docker/test/sqltest",
"docker/test/stateless"
]
},
@@ -155,11 +156,18 @@
},
"docker/docs/builder": {
"name": "clickhouse/docs-builder",
- "dependent": [
- ]
+ "dependent": []
},
"docker/test/sqllogic": {
"name": "clickhouse/sqllogic-test",
"dependent": []
+ },
+ "docker/test/sqltest": {
+ "name": "clickhouse/sqltest",
+ "dependent": []
+ },
+ "docker/test/integration/nginx_dav": {
+ "name": "clickhouse/nginx-dav",
+ "dependent": []
}
}
diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 8a6324aef88..c7206550bd8 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.6.2.18"
+ARG VERSION="23.7.4.5"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/packager/README.md b/docker/packager/README.md
index a78feb8d7fc..3a91f9a63f0 100644
--- a/docker/packager/README.md
+++ b/docker/packager/README.md
@@ -6,7 +6,7 @@ Usage:
Build deb package with `clang-14` in `debug` mode:
```
$ mkdir deb/test_output
-$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug
+$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build
$ ls -l deb/test_output
-rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb
-rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb
diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 897bcd24d04..940daad9c61 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -80,6 +80,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
# Download toolchain and SDK for Darwin
RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
+# Download and install mold 2.0 for s390x build
+RUN curl -Lo /tmp/mold.tar.gz "https://github.com/rui314/mold/releases/download/v2.0.0/mold-2.0.0-x86_64-linux.tar.gz" \
+ && mkdir /tmp/mold \
+ && tar -xzf /tmp/mold.tar.gz -C /tmp/mold \
+ && cp -r /tmp/mold/mold*/* /usr \
+ && rm -rf /tmp/mold \
+ && rm /tmp/mold.tar.gz
+
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
ARG NFPM_VERSION=2.20.0
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index c0803c74147..38b2ef75d24 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -64,7 +64,7 @@ then
ninja $NINJA_FLAGS clickhouse-keeper
ls -la ./programs/
- ldd ./programs/clickhouse-keeper
+ ldd ./programs/clickhouse-keeper ||:
if [ -n "$MAKE_DEB" ]; then
# No quotes because I want it to expand to nothing if empty.
@@ -80,19 +80,9 @@ else
cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
fi
-if [ "coverity" == "$COMBINED_OUTPUT" ]
-then
- mkdir -p /workdir/cov-analysis
-
- wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1
- export PATH=$PATH:/workdir/cov-analysis/bin
- cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC"
- SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int"
-fi
-
# No quotes because I want it to expand to nothing if empty.
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-$SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET
+ninja $NINJA_FLAGS $BUILD_TARGET
ls -la ./programs
@@ -175,13 +165,6 @@ then
mv "$COMBINED_OUTPUT.tar.zst" /output
fi
-if [ "coverity" == "$COMBINED_OUTPUT" ]
-then
- # Coverity does not understand ZSTD.
- tar -cvz -f "coverity-scan.tar.gz" cov-int
- mv "coverity-scan.tar.gz" /output
-fi
-
ccache_status
ccache --evict-older-than 1d
diff --git a/docker/packager/packager b/docker/packager/packager
index e12bd55dde3..1d0ccae8a69 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -22,7 +22,7 @@ def check_image_exists_locally(image_name: str) -> bool:
output = subprocess.check_output(
f"docker images -q {image_name} 2> /dev/null", shell=True
)
- return output != ""
+ return output != b""
except subprocess.CalledProcessError:
return False
@@ -46,7 +46,7 @@ def build_image(image_name: str, filepath: Path) -> None:
)
-def pre_build(repo_path: Path, env_variables: List[str]):
+def pre_build(repo_path: Path, env_variables: List[str]) -> None:
if "WITH_PERFORMANCE=1" in env_variables:
current_branch = subprocess.check_output(
"git branch --show-current", shell=True, encoding="utf-8"
@@ -80,9 +80,12 @@ def run_docker_image_with_env(
output_dir: Path,
env_variables: List[str],
ch_root: Path,
+ cargo_cache_dir: Path,
ccache_dir: Optional[Path],
-):
+) -> None:
output_dir.mkdir(parents=True, exist_ok=True)
+ cargo_cache_dir.mkdir(parents=True, exist_ok=True)
+
env_part = " -e ".join(env_variables)
if env_part:
env_part = " -e " + env_part
@@ -104,7 +107,7 @@ def run_docker_image_with_env(
cmd = (
f"docker run --network=host --user={user} --rm {ccache_mount}"
f"--volume={output_dir}:/output --volume={ch_root}:/build {env_part} "
- f"{interactive} {image_name}"
+ f"--volume={cargo_cache_dir}:/rust/cargo/registry {interactive} {image_name}"
)
logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd)
@@ -112,12 +115,12 @@ def run_docker_image_with_env(
subprocess.check_call(cmd, shell=True)
-def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool:
- return build_type == "" and package_type == "deb" and sanitizer == ""
+def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool:
+ return not debug_build and package_type == "deb" and sanitizer == ""
def parse_env_variables(
- build_type: str,
+ debug_build: bool,
compiler: str,
sanitizer: str,
package_type: str,
@@ -129,9 +132,10 @@ def parse_env_variables(
version: str,
official: bool,
additional_pkgs: bool,
+ with_profiler: bool,
with_coverage: bool,
with_binaries: str,
-):
+) -> List[str]:
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
ARM_SUFFIX = "-aarch64"
@@ -139,6 +143,7 @@ def parse_env_variables(
FREEBSD_SUFFIX = "-freebsd"
PPC_SUFFIX = "-ppc64le"
RISCV_SUFFIX = "-riscv64"
+ S390X_SUFFIX = "-s390x"
AMD64_COMPAT_SUFFIX = "-amd64-compat"
result = []
@@ -152,6 +157,7 @@ def parse_env_variables(
is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX)
is_cross_ppc = compiler.endswith(PPC_SUFFIX)
is_cross_riscv = compiler.endswith(RISCV_SUFFIX)
+ is_cross_s390x = compiler.endswith(S390X_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX)
@@ -213,6 +219,11 @@ def parse_env_variables(
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake"
)
+ elif is_cross_s390x:
+ cc = compiler[: -len(S390X_SUFFIX)]
+ cmake_flags.append(
+ "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-s390x.cmake"
+ )
elif is_amd64_compat:
cc = compiler[: -len(AMD64_COMPAT_SUFFIX)]
result.append("DEB_ARCH=amd64")
@@ -240,7 +251,7 @@ def parse_env_variables(
build_target = (
f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge"
)
- if is_release_build(build_type, package_type, sanitizer):
+ if is_release_build(debug_build, package_type, sanitizer):
cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
result.append("WITH_PERFORMANCE=1")
if is_cross_arm:
@@ -253,15 +264,10 @@ def parse_env_variables(
cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}")
cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}")
- # Create combined output archive for performance tests.
- if package_type == "coverity":
- result.append("COMBINED_OUTPUT=coverity")
- result.append('COVERITY_TOKEN="$COVERITY_TOKEN"')
-
if sanitizer:
result.append(f"SANITIZER={sanitizer}")
- if build_type:
- result.append(f"BUILD_TYPE={build_type.capitalize()}")
+ if debug_build:
+ result.append("BUILD_TYPE=Debug")
else:
result.append("BUILD_TYPE=None")
@@ -327,6 +333,9 @@ def parse_env_variables(
# utils are not included into clickhouse-bundle, so build everything
build_target = "all"
+ if with_profiler:
+ cmake_flags.append("-DENABLE_BUILD_PROFILING=1")
+
if with_coverage:
cmake_flags.append("-DWITH_COVERAGE=1")
@@ -356,7 +365,7 @@ def parse_args() -> argparse.Namespace:
)
parser.add_argument(
"--package-type",
- choices=["deb", "binary", "coverity"],
+ choices=["deb", "binary"],
required=True,
)
parser.add_argument(
@@ -366,7 +375,7 @@ def parse_args() -> argparse.Namespace:
help="ClickHouse git repository",
)
parser.add_argument("--output-dir", type=dir_name, required=True)
- parser.add_argument("--build-type", choices=("debug", ""), default="")
+ parser.add_argument("--debug-build", action="store_true")
parser.add_argument(
"--compiler",
@@ -378,6 +387,7 @@ def parse_args() -> argparse.Namespace:
"clang-16-aarch64-v80compat",
"clang-16-ppc64le",
"clang-16-riscv64",
+ "clang-16-s390x",
"clang-16-amd64-compat",
"clang-16-freebsd",
),
@@ -417,10 +427,18 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="if set, the build fails on errors writing cache to S3",
)
+ parser.add_argument(
+ "--cargo-cache-dir",
+ default=Path(os.getenv("CARGO_HOME", "") or Path.home() / ".cargo")
+ / "registry",
+ type=dir_name,
+ help="a directory to preserve the rust cargo crates",
+ )
parser.add_argument("--force-build-image", action="store_true")
parser.add_argument("--version")
parser.add_argument("--official", action="store_true")
parser.add_argument("--additional-pkgs", action="store_true")
+ parser.add_argument("--with-profiler", action="store_true")
parser.add_argument("--with-coverage", action="store_true")
parser.add_argument(
"--with-binaries", choices=("programs", "tests", ""), default=""
@@ -456,7 +474,7 @@ def parse_args() -> argparse.Namespace:
return args
-def main():
+def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
args = parse_args()
@@ -472,7 +490,7 @@ def main():
build_image(image_with_version, dockerfile)
env_prepared = parse_env_variables(
- args.build_type,
+ args.debug_build,
args.compiler,
args.sanitizer,
args.package_type,
@@ -484,6 +502,7 @@ def main():
args.version,
args.official,
args.additional_pkgs,
+ args.with_profiler,
args.with_coverage,
args.with_binaries,
)
@@ -495,6 +514,7 @@ def main():
args.output_dir,
env_prepared,
ch_root,
+ args.cargo_cache_dir,
args.ccache_dir,
)
logging.info("Output placed into %s", args.output_dir)
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 7f453627601..a3a1f912e90 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.6.2.18"
+ARG VERSION="23.7.4.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 1fa7b83ae16..4936cfeccb0 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.6.2.18"
+ARG VERSION="23.7.4.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image
diff --git a/docker/server/README.md b/docker/server/README.md
index 67646a262f5..6200acbd30c 100644
--- a/docker/server/README.md
+++ b/docker/server/README.md
@@ -97,8 +97,8 @@ docker run -d \
You may also want to mount:
-* `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustmenets
-* `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustmenets
+* `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustments
+* `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustments
* `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below).
### Linux capabilities
diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile
index f6836804454..b55baa0e0fc 100644
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@@ -11,6 +11,7 @@ RUN apt-get update \
pv \
ripgrep \
zstd \
+ locales \
--yes --no-install-recommends
# Sanitizer options for services (clickhouse-server)
@@ -18,17 +19,23 @@ RUN apt-get update \
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
# TSAN will flush shadow memory when reaching this limit.
# It may cause false-negatives, but it's better than OOM.
-RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
+RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
-ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
+ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
-ENV TZ=Europe/Moscow
+RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
+ENV LC_ALL en_US.UTF-8
+
+ENV TZ=Europe/Amsterdam
RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
+# This script is used to setup realtime export of server logs from the CI into external ClickHouse cluster:
+COPY setup_export_logs.sh /
+
CMD sleep 1
diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
new file mode 100755
index 00000000000..d68fd392d62
--- /dev/null
+++ b/docker/test/base/setup_export_logs.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# This script sets up export of system log tables to a remote server.
+# Remote tables are created if not exist, and augmented with extra columns,
+# and their names will contain a hash of the table structure,
+# which allows exporting tables from servers of different versions.
+
+# Pre-configured destination cluster, where to export the data
+CLUSTER=${CLUSTER:=system_logs_export}
+
+EXTRA_COLUMNS=${EXTRA_COLUMNS:="pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "}
+EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:="0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"}
+EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:="check_name, "}
+
+CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""}
+
+# Create all configured system logs:
+clickhouse-client --query "SYSTEM FLUSH LOGS"
+
+# It's doesn't make sense to try creating tables if SYNC fails
+echo "SYSTEM SYNC DATABASE REPLICA default" clickhouse-client --receive_timeout 180 $CONNECTION_PARAMETERS || exit 0
+
+# For each system log table:
+clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
+do
+ # Calculate hash of its structure:
+ hash=$(clickhouse-client --query "
+ SELECT sipHash64(groupArray((name, type)))
+ FROM (SELECT name, type FROM system.columns
+ WHERE database = 'system' AND table = '$table'
+ ORDER BY position)
+ ")
+
+ # Create the destination table with adapted name and structure:
+ statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
+ s/^\($/('"$EXTRA_COLUMNS"'/;
+ s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
+ s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
+ /^TTL /d
+ ')
+
+ echo "Creating destination table ${table}_${hash}" >&2
+
+ echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 $CONNECTION_PARAMETERS || continue
+
+ echo "Creating table system.${table}_sender" >&2
+
+ # Create Distributed table and materialized view to watch on the original table:
+ clickhouse-client --query "
+ CREATE TABLE system.${table}_sender
+ ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash})
+ SETTINGS flush_on_detach=0
+ EMPTY AS
+ SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
+ FROM system.${table}
+ "
+
+ echo "Creating materialized view system.${table}_watcher" >&2
+
+ clickhouse-client --query "
+ CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
+ SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
+ FROM system.${table}
+ "
+done
diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index da4baa8c687..ad24e662a6c 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -32,7 +32,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
-ENV TZ=Europe/Moscow
+ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV COMMIT_SHA=''
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 828c73e6781..477bbdb6919 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -141,13 +141,14 @@ function clone_submodules
contrib/jemalloc
contrib/replxx
contrib/wyhash
- contrib/hashidsxx
contrib/c-ares
contrib/morton-nd
contrib/xxHash
contrib/simdjson
contrib/liburing
contrib/libfiu
+ contrib/incbin
+ contrib/yaml-cpp
)
git submodule sync
@@ -170,6 +171,7 @@ function run_cmake
"-DENABLE_SIMDJSON=1"
"-DENABLE_JEMALLOC=1"
"-DENABLE_LIBURING=1"
+ "-DENABLE_YAML_CPP=1"
)
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile
index aa71074c02a..0bc0fb06633 100644
--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV LANG=C.UTF-8
-ENV TZ=Europe/Moscow
+ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update \
diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 5cda0831a84..05cc92ee040 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -122,6 +122,23 @@ EOL
$PWD
EOL
+
+ # Setup a cluster for logs export to ClickHouse Cloud
+ # Note: these variables are provided to the Docker run command by the Python script in tests/ci
+ if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
+ then
+ echo "
+remote_servers:
+ system_logs_export:
+ shard:
+ replica:
+ secure: 1
+ user: ci
+ host: '${CLICKHOUSE_CI_LOGS_HOST}'
+ port: 9440
+ password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
+" > db/config.d/system_logs_export.yaml
+ fi
}
function filter_exists_and_template
@@ -223,7 +240,22 @@ quit
done
clickhouse-client --query "select 1" # This checks that the server is responding
kill -0 $server_pid # This checks that it is our server that is started and not some other one
- echo Server started and responded
+ echo 'Server started and responded'
+
+ # Initialize export of system logs to ClickHouse Cloud
+ if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
+ then
+ export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
+ # TODO: Check if the password will appear in the logs.
+ export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
+
+ /setup_export_logs.sh
+
+ # Unset variables after use
+ export CONNECTION_PARAMETERS=''
+ export CLICKHOUSE_CI_LOGS_HOST=''
+ export CLICKHOUSE_CI_LOGS_PASSWORD=''
+ fi
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
# SC2046: Quote this to prevent word splitting. Actually I need word splitting.
diff --git a/docker/test/install/deb/Dockerfile b/docker/test/install/deb/Dockerfile
index 9614473c69b..e9c928b1fe7 100644
--- a/docker/test/install/deb/Dockerfile
+++ b/docker/test/install/deb/Dockerfile
@@ -12,6 +12,7 @@ ENV \
# install systemd packages
RUN apt-get update && \
apt-get install -y --no-install-recommends \
+ sudo \
systemd \
&& \
apt-get clean && \
diff --git a/docker/test/integration/nginx_dav/Dockerfile b/docker/test/integration/nginx_dav/Dockerfile
new file mode 100644
index 00000000000..42c1244f6dc
--- /dev/null
+++ b/docker/test/integration/nginx_dav/Dockerfile
@@ -0,0 +1,6 @@
+FROM nginx:alpine-slim
+
+COPY default.conf /etc/nginx/conf.d/
+
+RUN mkdir /usr/share/nginx/files/ \
+ && chown nginx: /usr/share/nginx/files/ -R
diff --git a/docker/test/integration/nginx_dav/default.conf b/docker/test/integration/nginx_dav/default.conf
new file mode 100644
index 00000000000..466d0584a2d
--- /dev/null
+++ b/docker/test/integration/nginx_dav/default.conf
@@ -0,0 +1,25 @@
+server {
+ listen 80;
+
+ #root /usr/share/nginx/test.com;
+ index index.html index.htm;
+
+ server_name test.com localhost;
+
+ location / {
+ expires max;
+ root /usr/share/nginx/files;
+ client_max_body_size 20m;
+ client_body_temp_path /usr/share/nginx/tmp;
+ dav_methods PUT; # Allowed methods, only PUT is necessary
+
+ create_full_put_path on; # nginx automatically creates nested directories
+ dav_access user:rw group:r all:r; # access permissions for files
+
+ limit_except GET {
+ allow all;
+ }
+ }
+
+ error_page 405 =200 $uri;
+}
diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index d6c127c8421..f67c45f7114 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -95,9 +95,11 @@ RUN python3 -m pip install --no-cache-dir \
pytest-timeout \
pytest-xdist \
pytz \
+ pyyaml==5.3.1 \
redis \
requests-kerberos \
tzlocal==2.1 \
+ retry \
urllib3
# Hudi supports only spark 3.3.*, not 3.4
@@ -128,10 +130,11 @@ COPY misc/ /misc/
# Same options as in test/base/Dockerfile
# (in case you need to override them in tests)
-ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
+ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"]
-CMD ["sh", "-c", "pytest $PYTEST_OPTS"]
+# To pass additional arguments (i.e. list of tests) use PYTEST_ADDOPTS
+CMD ["sh", "-c", "pytest"]
diff --git a/docker/test/integration/runner/compose/docker_compose_coredns.yml b/docker/test/integration/runner/compose/docker_compose_coredns.yml
index b329d4e0a46..e4736e04846 100644
--- a/docker/test/integration/runner/compose/docker_compose_coredns.yml
+++ b/docker/test/integration/runner/compose/docker_compose_coredns.yml
@@ -2,7 +2,7 @@ version: "2.3"
services:
coredns:
- image: coredns/coredns:latest
+ image: coredns/coredns:1.9.3 # :latest broke this test
restart: always
volumes:
- ${COREDNS_CONFIG_DIR}/example.com:/example.com
diff --git a/docker/test/integration/runner/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml
index f83eb93fea7..1cae54ad9e1 100644
--- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml
@@ -12,3 +12,5 @@ services:
- type: ${HDFS_FS:-tmpfs}
source: ${HDFS_LOGS:-}
target: /usr/local/hadoop/logs
+ sysctls:
+ net.ipv4.ip_local_port_range: '55000 65535'
diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml
index 7e34f4c114d..4ae3de3cbc7 100644
--- a/docker/test/integration/runner/compose/docker_compose_kafka.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml
@@ -4,6 +4,8 @@ services:
kafka_zookeeper:
image: zookeeper:3.4.9
hostname: kafka_zookeeper
+ ports:
+ - 2181:2181
environment:
ZOO_MY_ID: 1
ZOO_PORT: 2181
@@ -15,33 +17,59 @@ services:
image: confluentinc/cp-kafka:5.2.0
hostname: kafka1
ports:
- - ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081}
+ - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT}
environment:
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092
KAFKA_ADVERTISED_HOST_NAME: kafka1
- KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
KAFKA_BROKER_ID: 1
- KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
+ KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
depends_on:
- kafka_zookeeper
security_opt:
- label:disable
+ sysctls:
+ net.ipv4.ip_local_port_range: '55000 65535'
schema-registry:
image: confluentinc/cp-schema-registry:5.2.0
hostname: schema-registry
ports:
- - ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313}
+ - ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT}
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
- SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
+ SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT}
+ SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth
depends_on:
- kafka_zookeeper
- kafka1
+ restart: always
+ security_opt:
+ - label:disable
+
+ schema-registry-auth:
+ image: confluentinc/cp-schema-registry:5.2.0
+ hostname: schema-registry-auth
+ ports:
+ - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
+ environment:
+ SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth
+ SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
+ SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
+ SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
+ SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user
+ SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar
+ SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf"
+ SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth
+ volumes:
+ - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets
+ depends_on:
+ - kafka_zookeeper
+ - kafka1
+ restart: always
security_opt:
- label:disable
diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
index 1160192696d..e955a14eb3d 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
@@ -20,6 +20,8 @@ services:
depends_on:
- hdfskerberos
entrypoint: /etc/bootstrap.sh -d
+ sysctls:
+ net.ipv4.ip_local_port_range: '55000 65535'
hdfskerberos:
image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
@@ -29,3 +31,5 @@ services:
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
- /dev/urandom:/dev/random
expose: [88, 749]
+ sysctls:
+ net.ipv4.ip_local_port_range: '55000 65535'
diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
index 86e920ff573..49d4c1db90f 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
@@ -48,6 +48,8 @@ services:
- kafka_kerberos
security_opt:
- label:disable
+ sysctls:
+ net.ipv4.ip_local_port_range: '55000 65535'
kafka_kerberos:
image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
diff --git a/docker/test/integration/runner/compose/docker_compose_meili.yml b/docker/test/integration/runner/compose/docker_compose_meili.yml
index c734c43b4c6..c1fad4aca87 100644
--- a/docker/test/integration/runner/compose/docker_compose_meili.yml
+++ b/docker/test/integration/runner/compose/docker_compose_meili.yml
@@ -1,16 +1,15 @@
version: '2.3'
services:
meili1:
- image: getmeili/meilisearch:v0.27.0
+ image: getmeili/meilisearch:v0.27.0
restart: always
ports:
- ${MEILI_EXTERNAL_PORT:-7700}:${MEILI_INTERNAL_PORT:-7700}
meili_secure:
- image: getmeili/meilisearch:v0.27.0
+ image: getmeili/meilisearch:v0.27.0
restart: always
ports:
- ${MEILI_SECURE_EXTERNAL_PORT:-7700}:${MEILI_SECURE_INTERNAL_PORT:-7700}
environment:
MEILI_MASTER_KEY: "password"
-
diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml
index 3eaf891ff8e..f2979566296 100644
--- a/docker/test/integration/runner/compose/docker_compose_minio.yml
+++ b/docker/test/integration/runner/compose/docker_compose_minio.yml
@@ -14,7 +14,7 @@ services:
MINIO_ACCESS_KEY: minio
MINIO_SECRET_KEY: minio123
MINIO_PROMETHEUS_AUTH_TYPE: public
- command: server --address :9001 --certs-dir /certs /data1-1
+ command: server --console-address 127.0.0.1:19001 --address :9001 --certs-dir /certs /data1-1
depends_on:
- proxy1
- proxy2
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml
index 6b98a372bd0..103fe2769e9 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml
@@ -9,10 +9,10 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL_PORT:-3306}
- command: --server_id=100
- --log-bin='mysql-bin-1.log'
- --default-time-zone='+3:00'
- --gtid-mode="ON"
+ command: --server_id=100
+ --log-bin='mysql-bin-1.log'
+ --default-time-zone='+3:00'
+ --gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/error.log
@@ -21,4 +21,4 @@ services:
volumes:
- type: ${MYSQL_LOGS_FS:-tmpfs}
source: ${MYSQL_LOGS:-}
- target: /mysql/
\ No newline at end of file
+ target: /mysql/
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml
index d5fb5a53aaf..9c9c7430cec 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml
@@ -9,9 +9,9 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL8_PORT:-3306}
- command: --server_id=100 --log-bin='mysql-bin-1.log'
- --default_authentication_plugin='mysql_native_password'
- --default-time-zone='+3:00' --gtid-mode="ON"
+ command: --server_id=100 --log-bin='mysql-bin-1.log'
+ --default_authentication_plugin='mysql_native_password'
+ --default-time-zone='+3:00' --gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/error.log
@@ -20,4 +20,4 @@ services:
volumes:
- type: ${MYSQL8_LOGS_FS:-tmpfs}
source: ${MYSQL8_LOGS:-}
- target: /mysql/
\ No newline at end of file
+ target: /mysql/
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml
index 8e145a3b408..73f9e39f0d6 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml
@@ -9,10 +9,10 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT:-3306}
- command: --server_id=100
- --log-bin='mysql-bin-2.log'
- --default-time-zone='+3:00'
- --gtid-mode="ON"
+ command: --server_id=100
+ --log-bin='mysql-bin-2.log'
+ --default-time-zone='+3:00'
+ --gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/2_error.log
@@ -31,10 +31,10 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT:-3306}
- command: --server_id=100
- --log-bin='mysql-bin-3.log'
- --default-time-zone='+3:00'
- --gtid-mode="ON"
+ command: --server_id=100
+ --log-bin='mysql-bin-3.log'
+ --default-time-zone='+3:00'
+ --gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/3_error.log
@@ -53,10 +53,10 @@ services:
DATADIR: /mysql/
expose:
- ${MYSQL_CLUSTER_PORT:-3306}
- command: --server_id=100
- --log-bin='mysql-bin-4.log'
- --default-time-zone='+3:00'
- --gtid-mode="ON"
+ command: --server_id=100
+ --log-bin='mysql-bin-4.log'
+ --default-time-zone='+3:00'
+ --gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3
--log-error=/mysql/4_error.log
@@ -65,4 +65,4 @@ services:
volumes:
- type: ${MYSQL_CLUSTER_LOGS_FS:-tmpfs}
source: ${MYSQL_CLUSTER_LOGS:-}
- target: /mysql/
\ No newline at end of file
+ target: /mysql/
diff --git a/docker/test/integration/runner/compose/docker_compose_nginx.yml b/docker/test/integration/runner/compose/docker_compose_nginx.yml
index d0fb9fc1ff4..38d2a6d84c8 100644
--- a/docker/test/integration/runner/compose/docker_compose_nginx.yml
+++ b/docker/test/integration/runner/compose/docker_compose_nginx.yml
@@ -5,7 +5,7 @@ services:
# Files will be put into /usr/share/nginx/files.
nginx:
- image: kssenii/nginx-test:1.1
+ image: clickhouse/nginx-dav:${DOCKER_NGINX_DAV_TAG:-latest}
restart: always
ports:
- 80:80
diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml
index 1fb6b7a1410..2ef7eb17395 100644
--- a/docker/test/integration/runner/compose/docker_compose_postgres.yml
+++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml
@@ -12,9 +12,9 @@ services:
timeout: 5s
retries: 5
networks:
- default:
- aliases:
- - postgre-sql.local
+ default:
+ aliases:
+ - postgre-sql.local
environment:
POSTGRES_HOST_AUTH_METHOD: "trust"
POSTGRES_PASSWORD: mysecretpassword
diff --git a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml
index 7a1c32e0023..b5dbae423b2 100644
--- a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml
+++ b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml
@@ -12,7 +12,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- - type: bind
+ - type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind
@@ -37,7 +37,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- - type: bind
+ - type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind
@@ -61,7 +61,7 @@ services:
command: ["zkServer.sh", "start-foreground"]
entrypoint: /zookeeper-ssl-entrypoint.sh
volumes:
- - type: bind
+ - type: bind
source: /misc/zookeeper-ssl-entrypoint.sh
target: /zookeeper-ssl-entrypoint.sh
- type: bind
diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh
index 3c4ff522b36..b05aef76faf 100755
--- a/docker/test/integration/runner/dockerd-entrypoint.sh
+++ b/docker/test/integration/runner/dockerd-entrypoint.sh
@@ -64,15 +64,16 @@ export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge
export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge
export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest}
-export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest}
-export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest}
export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest}
+export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest}
+export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
+export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
+export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}
+export DOCKER_NGINX_DAV_TAG=${DOCKER_NGINX_DAV_TAG:=latest}
export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest}
-export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
-export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest}
cd /ClickHouse/tests/integration
exec "$@"
diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index ab9f1f8a2e3..d31663f9071 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -1,18 +1,7 @@
# docker build -t clickhouse/performance-comparison .
-# Using ubuntu:22.04 over 20.04 as all other images, since:
-# a) ubuntu 20.04 has too old parallel, and does not support --memsuspend
-# b) anyway for perf tests it should not be important (backward compatiblity
-# with older ubuntu had been checked lots of times in various tests)
-FROM ubuntu:22.04
-
-# ARG for quick switch to a given ubuntu mirror
-ARG apt_archive="http://archive.ubuntu.com"
-RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
-
-ENV LANG=C.UTF-8
-ENV TZ=Europe/Moscow
-RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
@@ -56,10 +45,9 @@ COPY * /
# node #0 should be less stable because of system interruptions. We bind
# randomly to node 1 or 0 to gather some statistics on that. We have to bind
# both servers and the tmpfs on which the database is stored. How to do it
-# through Yandex Sandbox API is unclear, but by default tmpfs uses
+# is unclear, but by default tmpfs uses
# 'process allocation policy', not sure which process but hopefully the one that
-# writes to it, so just bind the downloader script as well. We could also try to
-# remount it with proper options in Sandbox task.
+# writes to it, so just bind the downloader script as well.
# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
# Double-escaped backslashes are a tribute to the engineering wonder of docker --
# it gives '/bin/sh: 1: [bash,: not found' otherwise.
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 798d2a40b12..4b1b5c13b9b 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -90,7 +90,7 @@ function configure
set +m
wait_for_server $LEFT_SERVER_PORT $left_pid
- echo Server for setup started
+ echo "Server for setup started"
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
@@ -156,9 +156,9 @@ function restart
wait_for_server $RIGHT_SERVER_PORT $right_pid
echo right ok
- clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database != 'system'"
+ clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options"
- clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database != 'system'"
+ clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options"
# Check again that both servers we started are running -- this is important
@@ -352,14 +352,12 @@ function get_profiles
wait
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
- clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
- clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: &
@@ -665,9 +663,8 @@ create view partial_query_times as select * from
-- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g.
-- queries with new functions added in the tested PR).
create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv')
- settings output_format_decimal_trailing_zeros = 1
- as select toDecimal64(time_median, 3) time,
- toDecimal64(time_stddev / time_median, 3) relative_time_stddev,
+ as select round(time_median, 3) time,
+ round(time_stddev / time_median, 3) relative_time_stddev,
test, query_index, query_display_name
from partial_query_times
join query_display_names using (test, query_index)
@@ -739,28 +736,26 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
;
create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv')
- settings output_format_decimal_trailing_zeros = 1
as with
-- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion
-- of NaN to decimal.
(left > right ? left / right : right / left) as times_change_float,
isFinite(times_change_float) as times_change_finite,
- toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal,
+ round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal,
times_change_finite
? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x'
: '--' as times_change_str
select
- toDecimal64(left, 3), toDecimal64(right, 3), times_change_str,
- toDecimal64(diff, 3), toDecimal64(stat_threshold, 3),
+ round(left, 3), round(right, 3), times_change_str,
+ round(diff, 3), round(stat_threshold, 3),
changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv')
- settings output_format_decimal_trailing_zeros = 1
as select
- toDecimal64(left, 3), toDecimal64(right, 3), toDecimal64(diff, 3),
- toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name
+ round(left, 3), round(right, 3), round(diff, 3),
+ round(stat_threshold, 3), unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
@@ -789,11 +784,10 @@ create view total_speedup as
;
create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv')
- settings output_format_decimal_trailing_zeros = 1
as with
(times_speedup >= 1
- ? '-' || toString(toDecimal64(times_speedup, 3)) || 'x'
- : '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x')
+ ? '-' || toString(round(times_speedup, 3)) || 'x'
+ : '+' || toString(round(1 / times_speedup, 3)) || 'x')
as times_speedup_str
select test, times_speedup_str, queries, bad, changed, unstable
-- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all
@@ -817,11 +811,10 @@ create view total_client_time_per_query as select *
'test text, query_index int, client float, server float');
create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv')
- settings output_format_decimal_trailing_zeros = 1
- as select client, server, toDecimal64(client/server, 3) p,
+ as select client, server, round(client/server, 3) p,
test, query_display_name
from total_client_time_per_query left join query_display_names using (test, query_index)
- where p > toDecimal64(1.02, 3) order by p desc;
+ where p > round(1.02, 3) order by p desc;
create table wall_clock_time_per_test engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
@@ -899,15 +892,14 @@ create view test_times_view_total as
;
create table test_times_report engine File(TSV, 'report/test-times.tsv')
- settings output_format_decimal_trailing_zeros = 1
as select
test,
- toDecimal64(real, 3),
- toDecimal64(total_client_time, 3),
+ round(real, 3),
+ round(total_client_time, 3),
queries,
- toDecimal64(query_max, 3),
- toDecimal64(avg_real_per_query, 3),
- toDecimal64(query_min, 3),
+ round(query_max, 3),
+ round(avg_real_per_query, 3),
+ round(query_min, 3),
runs
from (
select * from test_times_view
@@ -919,21 +911,20 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv')
-- report for all queries page, only main metric
create table all_tests_report engine File(TSV, 'report/all-queries.tsv')
- settings output_format_decimal_trailing_zeros = 1
as with
-- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion
-- of NaN to decimal.
(left > right ? left / right : right / left) as times_change_float,
isFinite(times_change_float) as times_change_finite,
- toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal,
+ round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal,
times_change_finite
? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x'
: '--' as times_change_str
select changed_fail, unstable_fail,
- toDecimal64(left, 3), toDecimal64(right, 3), times_change_str,
- toDecimal64(isFinite(diff) ? diff : 0, 3),
- toDecimal64(isFinite(stat_threshold) ? stat_threshold : 0, 3),
+ round(left, 3), round(right, 3), times_change_str,
+ round(isFinite(diff) ? diff : 0, 3),
+ round(isFinite(stat_threshold) ? stat_threshold : 0, 3),
test, query_index, query_display_name
from queries order by test, query_index;
@@ -1044,27 +1035,6 @@ create table unstable_run_traces engine File(TSVWithNamesAndTypes,
order by count() desc
;
-create table metric_devation engine File(TSVWithNamesAndTypes,
- 'report/metric-deviation.$version.tsv')
- settings output_format_decimal_trailing_zeros = 1
- -- first goes the key used to split the file with grep
- as select test, query_index, query_display_name,
- toDecimal64(d, 3) d, q, metric
- from (
- select
- test, query_index,
- (q[3] - q[1])/q[2] d,
- quantilesExact(0, 0.5, 1)(value) q, metric
- from (select * from unstable_run_metrics
- union all select * from unstable_run_traces
- union all select * from unstable_run_metrics_2) mm
- group by test, query_index, metric
- having isFinite(d) and d > 0.5 and q[3] > 5
- ) metrics
- left join query_display_names using (test, query_index)
- order by test, query_index, d desc
- ;
-
create table stacks engine File(TSV, 'report/stacks.$version.tsv') as
select
-- first goes the key used to split the file with grep
@@ -1173,9 +1143,8 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as
-- Show metrics that have changed
create table changes engine File(TSV, 'metrics/changes.tsv')
- settings output_format_decimal_trailing_zeros = 1
as select metric, left, right,
- toDecimal64(diff, 3), toDecimal64(times_diff, 3)
+ round(diff, 3), round(times_diff, 3)
from (
select metric, median(left) as left, median(right) as right,
(right - left) / left diff,
@@ -1226,7 +1195,6 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
'$SHA_TO_TEST' :: LowCardinality(String) AS commit_sha,
'${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME:-Performance}' :: LowCardinality(String) AS check_name,
'$(sed -n 's/.*/\1/p' report.html)' :: LowCardinality(String) AS check_status,
- -- TODO toDateTime() can't parse output of 'date', so no time for now.
(($(date +%s) - $CHPC_CHECK_START_TIMESTAMP) * 1000) :: UInt64 AS check_duration_ms,
fromUnixTimestamp($CHPC_CHECK_START_TIMESTAMP) check_start_time,
test_name :: LowCardinality(String) AS test_name ,
diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
index 39c29bb61ca..292665c4f68 100644
--- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
+++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
@@ -19,31 +19,6 @@
-
-
- ENGINE = Memory
-
-
-
- ENGINE = Memory
-
-
-
- ENGINE = Memory
-
-
-
- ENGINE = Memory
-
-
-
- ENGINE = Memory
-
-
-
100000000010
diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
index 093834943a3..dab41adca51 100644
--- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
+++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
@@ -3,7 +3,7 @@
11
- 1
+ 0Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day
```
-
## trace_log {#server_configuration_parameters-trace_log}
Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
@@ -1875,6 +1983,12 @@ Parameters:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
- `storage_policy` – Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@@ -1886,6 +2000,10 @@ The default server configuration file `config.xml` contains the following settin
trace_log
toYYYYMM(event_date)7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1900,9 +2018,18 @@ Parameters:
- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
- `storage_policy` – Name of storage policy to use for the table (optional)
**Example**
+
```xml
@@ -1910,11 +2037,53 @@ Parameters:
asynchronous_insert_log
7500toYYYYMM(event_date)
+ 1048576
+ 8192
+ 524288
+ false
```
+## crash_log {#server_configuration_parameters-crash_log}
+
+Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation.
+
+Parameters:
+
+- `database` — Database for storing a table.
+- `table` — Table name.
+- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined.
+- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
+- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined.
+- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
+- `storage_policy` – Name of storage policy to use for the table (optional)
+- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
+
+The default server configuration file `config.xml` contains the following settings section:
+
+``` xml
+
+ system
+
crash_log
+ toYYYYMM(event_date)
+ 7500
+ 1024
+ 1024
+ 512
+ false
+
+```
+
## query_masking_rules {#query-masking-rules}
Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs,
@@ -2119,6 +2288,8 @@ This section contains the following parameters:
- `session_timeout_ms` — Maximum timeout for the client session in milliseconds.
- `operation_timeout_ms` — Maximum timeout for one operation in milliseconds.
- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional.
+- `fallback_session_lifetime.min` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the minimal duration of the fallback session. Set in seconds. Optional. Default is 3 hours.
+- `fallback_session_lifetime.max` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the maximum duration of the fallback session. Set in seconds. Optional. Default is 6 hours.
- `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional.
- zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection.
* random - randomly selects one of ZooKeeper nodes.
diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md
index eb1d5db5676..86d24c3a942 100644
--- a/docs/en/operations/settings/index.md
+++ b/docs/en/operations/settings/index.md
@@ -7,90 +7,20 @@ pagination_next: en/operations/settings/settings
# Settings Overview
-There are multiple ways to define ClickHouse settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting.
+:::note
+XML-based Settings Profiles and [configuration files](https://clickhouse.com/docs/en/operations/configuration-files) are currently not supported for ClickHouse Cloud. To specify settings for your ClickHouse Cloud service, you must use [SQL-driven Settings Profiles](https://clickhouse.com/docs/en/operations/access-rights#settings-profiles-management).
+:::
-The order of priority for defining a setting is:
+There are two main groups of ClickHouse settings:
-1. Settings in the `users.xml` server configuration file
+- Global server settings
+- Query-level settings
- - Set in the element ``.
+The main distinction between global server settings and query-level settings is that
+global server settings must be set in configuration files while query-level settings
+can be set in configuration files or with SQL queries.
-2. Session settings
+Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level.
- - Send `SET setting=value` from the ClickHouse console client in interactive mode.
- Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` HTTP parameter.
+Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level.
-3. Query settings
-
- - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`.
- - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`).
- - Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed.
-
-View the [Settings](./settings.md) page for a description of the ClickHouse settings.
-
-## Converting a Setting to its Default Value
-
-If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like:
-
-```sql
-SET setting_name = DEFAULT
-```
-
-For example, the default value of `max_insert_block_size` is 1048449. Suppose you change its value to 100000:
-
-```sql
-SET max_insert_block_size=100000;
-
-SELECT value FROM system.settings where name='max_insert_block_size';
-```
-
-The response is:
-
-```response
-┌─value──┐
-│ 100000 │
-└────────┘
-```
-
-The following command sets its value back to 1048449:
-
-```sql
-SET max_insert_block_size=DEFAULT;
-
-SELECT value FROM system.settings where name='max_insert_block_size';
-```
-
-The setting is now back to its default:
-
-```response
-┌─value───┐
-│ 1048449 │
-└─────────┘
-```
-
-
-## Custom Settings {#custom_settings}
-
-In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings.
-
-A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file.
-
-```xml
-custom_
-```
-
-To define a custom setting use `SET` command:
-
-```sql
-SET custom_a = 123;
-```
-
-To get the current value of a custom setting use `getSetting()` function:
-
-```sql
-SELECT getSetting('custom_a');
-```
-
-**See Also**
-
-- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md)
diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index 4122b4af40f..8ea599b9861 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -56,11 +56,11 @@ Possible values:
- Any positive integer.
-Default value: 300.
+Default value: 3000.
To achieve maximum performance of `SELECT` queries, it is necessary to minimize the number of parts processed, see [Merge Tree](../../development/architecture.md#merge-tree).
-You can set a larger value to 600 (1200), this will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300.
+Prior to 23.6 this setting was set to 300. You can set a higher different value, it will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300.
## parts_to_delay_insert {#parts-to-delay-insert}
diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 163ed5d5826..14d612be831 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -298,7 +298,7 @@ Default value: `THROW`.
- [JOIN clause](../../sql-reference/statements/select/join.md#select-join)
- [Join table engine](../../engines/table-engines/special/join.md)
-## max_partitions_per_insert_block {#max-partitions-per-insert-block}
+## max_partitions_per_insert_block {#settings-max_partitions_per_insert_block}
Limits the maximum number of partitions in a single inserted block.
@@ -309,9 +309,18 @@ Default value: 100.
**Details**
-When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text:
+When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse either logs a warning or throws an exception based on `throw_on_max_partitions_per_insert_block`. Exceptions have the following text:
-> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
+> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
+
+## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block}
+
+Allows you to control behaviour when `max_partitions_per_insert_block` is reached.
+
+- `true` - When an insert block reaches `max_partitions_per_insert_block`, an exception is raised.
+- `false` - Logs a warning when `max_partitions_per_insert_block` is reached.
+
+Default value: `true`
## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user}
@@ -327,3 +336,39 @@ The maximum amount of data consumed by temporary files on disk in bytes for all
Zero means unlimited.
Default value: 0.
+
+## max_sessions_for_user {#max-sessions-per-user}
+
+Maximum number of simultaneous sessions per authenticated user to the ClickHouse server.
+
+Example:
+
+``` xml
+
+
+ 1
+
+
+ 2
+
+
+ 0
+
+
+
+
+
+ single_session_user
+
+
+
+ two_sessions_profile
+
+
+
+ unlimited_sessions_profile
+
+
+```
+
+Default value: 0 (Infinite count of simultaneous sessions).
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 1b22a6d1223..beb1d372e08 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -242,6 +242,26 @@ See also:
- [DateTime data type.](../../sql-reference/data-types/datetime.md)
- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md)
+## interval_output_format {#interval_output_format}
+
+Allows choosing different output formats of the text representation of interval types.
+
+Possible values:
+
+- `kusto` - KQL-style output format.
+
+ ClickHouse outputs intervals in [KQL format](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-timespan-format-strings#the-constant-c-format-specifier). For example, `toIntervalDay(2)` would be formatted as `2.00:00:00`. Please note that for interval types of varying length (ie. `IntervalMonth` and `IntervalYear`) the average number of seconds per interval is taken into account.
+
+- `numeric` - Numeric output format.
+
+ ClickHouse outputs intervals as their underlying numeric representation. For example, `toIntervalDay(2)` would be formatted as `2`.
+
+Default value: `numeric`.
+
+See also:
+
+- [Interval](../../sql-reference/data-types/special-data-types/interval.md)
+
## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error}
Deserialization of IPv4 will use default values instead of throwing exception on conversion error.
@@ -301,6 +321,10 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar
This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format.
+## output_format_schema {#output-format-schema}
+
+The path to the file where the automatically generated schema will be saved in [Cap’n Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats.
+
## output_format_enable_streaming {#output_format_enable_streaming}
Enable streaming in output formats that support it.
@@ -931,6 +955,11 @@ Result
```text
" string "
```
+### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
+
+ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values.
+
+Disabled by default.
### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter}
@@ -964,6 +993,28 @@ Result
a b
```
+### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values}
+
+Allow to set default value to column when CSV field deserialization failed on bad value
+
+Default value: `false`.
+
+**Examples**
+
+Query
+
+```bash
+./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x"
+echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV"
+./clickhouse local -q "select * from test_tbl"
+```
+
+Result
+
+```text
+a 0 1971-01-01
+```
+
## Values format settings {#values-format-settings}
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
@@ -1061,17 +1112,6 @@ Default value: 1.
## Arrow format settings {#arrow-format-settings}
-### input_format_arrow_import_nested {#input_format_arrow_import_nested}
-
-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
-
-Possible values:
-
-- 0 — Data can not be inserted into `Nested` columns as an array of structs.
-- 1 — Data can be inserted into `Nested` columns as an array of structs.
-
-Default value: `0`.
-
### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching}
Ignore case when matching Arrow column names with ClickHouse column names.
@@ -1117,21 +1157,10 @@ Enabled by default.
Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
-Default value: `none`.
+Default value: `lz4_frame`.
## ORC format settings {#orc-format-settings}
-### input_format_orc_import_nested {#input_format_orc_import_nested}
-
-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
-
-Possible values:
-
-- 0 — Data can not be inserted into `Nested` columns as an array of structs.
-- 1 — Data can be inserted into `Nested` columns as an array of structs.
-
-Default value: `0`.
-
### input_format_orc_row_batch_size {#input_format_orc_row_batch_size}
Batch size when reading ORC stripes.
@@ -1170,17 +1199,6 @@ Default value: `none`.
## Parquet format settings {#parquet-format-settings}
-### input_format_parquet_import_nested {#input_format_parquet_import_nested}
-
-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
-
-Possible values:
-
-- 0 — Data can not be inserted into `Nested` columns as an array of structs.
-- 1 — Data can be inserted into `Nested` columns as an array of structs.
-
-Default value: `0`.
-
### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching}
Ignore case when matching Parquet column names with ClickHouse column names.
@@ -1283,6 +1301,11 @@ When serializing Nullable columns with Google wrappers, serialize default values
Disabled by default.
+### format_protobuf_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema}
+
+Use autogenerated Protobuf schema when [format_schema](#formatschema-format-schema) is not set.
+The schema is generated from ClickHouse table structure using function [structureToProtobufSchema](../../sql-reference/functions/other-functions.md#structure_to_protobuf_schema)
+
## Avro format settings {#avro-format-settings}
### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields}
@@ -1300,6 +1323,17 @@ Default value: 0.
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format.
+Format:
+``` text
+http://[user:password@]machine[:port]"
+```
+
+Examples:
+``` text
+http://registry.example.com:8081
+http://admin:secret@registry.example.com:8081
+```
+
Default value: `Empty`.
### output_format_avro_codec {#output_format_avro_codec}
@@ -1568,6 +1602,11 @@ Possible values:
Default value: `'by_values'`.
+### format_capn_proto_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema}
+
+Use autogenerated CapnProto schema when [format_schema](#formatschema-format-schema) is not set.
+The schema is generated from ClickHouse table structure using function [structureToCapnProtoSchema](../../sql-reference/functions/other-functions.md#structure_to_capnproto_schema)
+
## MySQLDump format settings {#musqldump-format-settings}
### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name)
diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md
index 2f39a75453c..d08266b8ef8 100644
--- a/docs/en/operations/settings/settings-profiles.md
+++ b/docs/en/operations/settings/settings-profiles.md
@@ -39,7 +39,7 @@ Example:
8
-
+
1000000000100000000000
@@ -67,6 +67,8 @@ Example:
50100
+ 4
+
1
diff --git a/docs/en/operations/settings/settings-query-level.md b/docs/en/operations/settings/settings-query-level.md
new file mode 100644
index 00000000000..81cc2294a4c
--- /dev/null
+++ b/docs/en/operations/settings/settings-query-level.md
@@ -0,0 +1,217 @@
+---
+sidebar_label: Query-level Settings
+title: Query-level Settings
+slug: /en/operations/settings/query-level
+---
+
+There are multiple ways to set ClickHouse query-level settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting.
+
+The order of priority for defining a setting is:
+
+1. Applying a setting to a user directly, or within a settings profile
+
+ - SQL (recommended)
+ - adding one or more XML or YAML files to `/etc/clickhouse-server/users.d`
+
+2. Session settings
+
+ - Send `SET setting=value` from the ClickHouse Cloud SQL console or
+ `clickhouse client` in interactive mode. Similarly, you can use ClickHouse
+ sessions in the HTTP protocol. To do this, you need to specify the
+ `session_id` HTTP parameter.
+
+3. Query settings
+
+ - When starting `clickhouse client` in non-interactive mode, set the startup
+ parameter `--setting=value`.
+ - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`).
+ - Define settings in the
+ [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query)
+ clause of the SELECT query. The setting value is applied only to that query
+ and is reset to the default or previous value after the query is executed.
+
+## Examples
+
+These examples all set the value of the `async_insert` setting to `1`, and
+show how to examine the settings in a running system.
+
+### Using SQL to apply a setting to a user directly
+
+This creates the user `ingester` with the setting `async_inset = 1`:
+
+```sql
+CREATE USER ingester
+IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3'
+# highlight-next-line
+SETTINGS async_insert = 1
+```
+
+#### Examine the settings profile and assignment
+
+```sql
+SHOW ACCESS
+```
+
+```response
+┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐
+│ ... │
+# highlight-next-line
+│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS async_insert = true │
+│ ... │
+└────────────────────────────────────────────────────────────────────────────────────┘
+```
+### Using SQL to create a settings profile and assign to a user
+
+This creates the profile `log_ingest` with the setting `async_inset = 1`:
+
+```sql
+CREATE
+SETTINGS PROFILE log_ingest SETTINGS async_insert = 1
+```
+
+This creates the user `ingester` and assigns the user the settings profile `log_ingest`:
+
+```sql
+CREATE USER ingester
+IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3'
+# highlight-next-line
+SETTINGS PROFILE log_ingest
+```
+
+
+### Using XML to create a settings profile and user
+
+```xml title=/etc/clickhouse-server/users.d/users.xml
+
+# highlight-start
+
+
+ 1
+
+
+# highlight-end
+
+
+
+ 7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3
+# highlight-start
+ log_ingest
+# highlight-end
+
+
+ 7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3
+ 1
+ 1
+
+
+
+```
+
+#### Examine the settings profile and assignment
+
+```sql
+SHOW ACCESS
+```
+
+```response
+┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐
+│ CREATE USER default IDENTIFIED WITH sha256_password │
+# highlight-next-line
+│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS PROFILE log_ingest │
+│ CREATE SETTINGS PROFILE default │
+# highlight-next-line
+│ CREATE SETTINGS PROFILE log_ingest SETTINGS async_insert = true │
+│ CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1 │
+│ ... │
+└────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+### Assign a setting to a session
+
+```sql
+SET async_insert =1;
+SELECT value FROM system.settings where name='async_insert';
+```
+
+```response
+┌─value──┐
+│ 1 │
+└────────┘
+```
+
+### Assign a setting during a query
+
+```sql
+INSERT INTO YourTable
+# highlight-next-line
+SETTINGS async_insert=1
+VALUES (...)
+```
+
+
+## Converting a Setting to its Default Value
+
+If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like:
+
+```sql
+SET setting_name = DEFAULT
+```
+
+For example, the default value of `async_insert` is `0`. Suppose you change its value to `1`:
+
+```sql
+SET async_insert = 1;
+
+SELECT value FROM system.settings where name='async_insert';
+```
+
+The response is:
+
+```response
+┌─value──┐
+│ 1 │
+└────────┘
+```
+
+The following command sets its value back to 0:
+
+```sql
+SET async_insert = DEFAULT;
+
+SELECT value FROM system.settings where name='async_insert';
+```
+
+The setting is now back to its default:
+
+```response
+┌─value───┐
+│ 0 │
+└─────────┘
+```
+
+## Custom Settings {#custom_settings}
+
+In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings.
+
+A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file.
+
+```xml
+custom_
+```
+
+To define a custom setting use `SET` command:
+
+```sql
+SET custom_a = 123;
+```
+
+To get the current value of a custom setting use `getSetting()` function:
+
+```sql
+SELECT getSetting('custom_a');
+```
+
+**See Also**
+
+- View the [Settings](./settings.md) page for a description of the ClickHouse settings.
+- [Global server settings](../../operations/server-configuration-parameters/settings.md)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 8b969f87a4d..29434ef556f 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -98,6 +98,18 @@ Default value: 0.
```
+## mutations_execute_nondeterministic_on_initiator {#mutations_execute_nondeterministic_on_initiator}
+
+If true constant nondeterministic functions (e.g. function `now()`) are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. It helps to keep data in sync on replicas while executing mutations with constant nondeterministic functions. Default value: `false`.
+
+## mutations_execute_subqueries_on_initiator {#mutations_execute_subqueries_on_initiator}
+
+If true scalar subqueries are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. Default value: `false`.
+
+## mutations_max_literal_size_to_replace {#mutations_max_literal_size_to_replace}
+
+The maximum size of serialized literal in bytes to replace in `UPDATE` and `DELETE` queries. Takes effect only if at least one the two settings above is enabled. Default value: 16384 (16 KiB).
+
## distributed_product_mode {#distributed-product-mode}
Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md).
@@ -537,6 +549,8 @@ Possible values:
The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
+ Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
+
- hash
[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
@@ -3466,6 +3480,12 @@ Possible values:
Default value: `0`.
+## enable_url_encoding {#enable_url_encoding}
+
+Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
+
+Enabled by default.
+
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
@@ -4290,7 +4310,7 @@ Use this setting only for backward compatibility if your use cases depend on old
## session_timezone {#session_timezone}
Sets the implicit time zone of the current session or query.
-The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone.
+The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone.
The setting takes precedence over the globally configured (server-level) implicit time zone.
A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone).
@@ -4325,7 +4345,7 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric
```
:::warning
-Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors.
+Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors.
See the following example and explanation.
:::
@@ -4524,6 +4544,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta
### Placeholders
+- `%a` — Full original filename (e.g., "sample.csv").
- `%f` — Original filename without extension (e.g., "sample").
- `%e` — Original file extension with dot (e.g., ".csv").
- `%t` — Timestamp (in microseconds).
@@ -4569,3 +4590,39 @@ Type: Int64
Default: 0
+## rewrite_count_distinct_if_with_count_distinct_implementation
+
+Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting.
+
+Possible values:
+
+- true — Allow.
+- false — Disallow.
+
+Default value: `false`.
+
+## precise_float_parsing {#precise_float_parsing}
+
+Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms:
+* If the value is `1`, then precise method is used. It is slower than fast method, but it always returns a number that is the closest machine representable number to the input.
+* Otherwise, fast method is used (default). It usually returns the same value as precise, but in rare cases result may differ by one or two least significant digits.
+
+Possible values: `0`, `1`.
+
+Default value: `0`.
+
+Example:
+
+```sql
+SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0;
+
+┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
+│ 1.7090999999999998 │ 15008753.000000002 │
+└─────────────────────┴──────────────────────────┘
+
+SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1;
+
+┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
+│ 1.7091 │ 15008753 │
+└─────────────────────┴──────────────────────────┘
+```
diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md
index f357341da67..e46b495239c 100644
--- a/docs/en/operations/system-tables/asynchronous_metrics.md
+++ b/docs/en/operations/system-tables/asynchronous_metrics.md
@@ -32,6 +32,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
└─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
+
+
## Metric descriptions
@@ -483,6 +487,14 @@ The value is similar to `OSUserTime` but divided to the number of CPU cores to b
Number of threads in the server of the PostgreSQL compatibility protocol.
+### QueryCacheBytes
+
+Total size of the query cache cache in bytes.
+
+### QueryCacheEntries
+
+Total number of entries in the query cache.
+
### ReplicasMaxAbsoluteDelay
Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data.
diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md
index ba5602ee292..d2b90a49b0d 100644
--- a/docs/en/operations/system-tables/events.md
+++ b/docs/en/operations/system-tables/events.md
@@ -10,6 +10,9 @@ Columns:
- `event` ([String](../../sql-reference/data-types/string.md)) — Event name.
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred.
- `description` ([String](../../sql-reference/data-types/string.md)) — Event description.
+- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`.
+
+You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp).
**Example**
diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md
index 1b720098fc7..a46f306f677 100644
--- a/docs/en/operations/system-tables/index.md
+++ b/docs/en/operations/system-tables/index.md
@@ -47,6 +47,10 @@ An example:
ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024
-->
7500
+ 1048576
+ 8192
+ 524288
+ false
```
diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md
index d8539908bf7..557835ce3b6 100644
--- a/docs/en/operations/system-tables/merge_tree_settings.md
+++ b/docs/en/operations/system-tables/merge_tree_settings.md
@@ -7,11 +7,17 @@ Contains information about settings for `MergeTree` tables.
Columns:
-- `name` (String) — Setting name.
-- `value` (String) — Setting value.
-- `description` (String) — Setting description.
-- `type` (String) — Setting type (implementation specific string value).
-- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed.
+- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name.
+- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value.
+- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed.
+- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description.
+- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
+- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
+- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
+ - `0` — Current user can change the setting.
+ - `1` — Current user can’t change the setting.
+- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value).
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
**Example**
```sql
@@ -21,35 +27,51 @@ SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
```response
Row 1:
──────
+name: min_compress_block_size
+value: 0
+changed: 0
+description: When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.
+min: ____
+max: ____
+readonly: 0
+type: UInt64
+is_obsolete: 0
+
+Row 2:
+──────
+name: max_compress_block_size
+value: 0
+changed: 0
+description: Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.
+min: ____
+max: ____
+readonly: 0
+type: UInt64
+is_obsolete: 0
+
+Row 3:
+──────
name: index_granularity
value: 8192
changed: 0
description: How many rows correspond to one primary key value.
-type: SettingUInt64
-
-Row 2:
-──────
-name: min_bytes_for_wide_part
-value: 0
-changed: 0
-description: Minimal uncompressed size in bytes to create part in wide format instead of compact
-type: SettingUInt64
-
-Row 3:
-──────
-name: min_rows_for_wide_part
-value: 0
-changed: 0
-description: Minimal number of rows to create part in wide format instead of compact
-type: SettingUInt64
+min: ____
+max: ____
+readonly: 0
+type: UInt64
+is_obsolete: 0
Row 4:
──────
-name: merge_max_block_size
-value: 8192
+name: max_digestion_size_per_segment
+value: 268435456
changed: 0
-description: How many rows in blocks should be formed for merge operations.
-type: SettingUInt64
+description: Max number of bytes to digest per segment to build GIN index.
+min: ____
+max: ____
+readonly: 0
+type: UInt64
+is_obsolete: 0
-4 rows in set. Elapsed: 0.001 sec.
+4 rows in set. Elapsed: 0.009 sec.
```
diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md
index 5a7dfd03eb4..ae0e7620d35 100644
--- a/docs/en/operations/system-tables/metrics.md
+++ b/docs/en/operations/system-tables/metrics.md
@@ -10,8 +10,9 @@ Columns:
- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value.
- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description.
+- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `metric`.
-The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse.
+You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
**Example**
diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md
index 5829e5ad313..8113b850a38 100644
--- a/docs/en/operations/system-tables/parts.md
+++ b/docs/en/operations/system-tables/parts.md
@@ -39,6 +39,8 @@ Columns:
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
+- `primary_key_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk.
+
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks.
- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included.
diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md
index b9fdd19c643..7143520835f 100644
--- a/docs/en/operations/system-tables/query_log.md
+++ b/docs/en/operations/system-tables/query_log.md
@@ -48,7 +48,7 @@ Columns:
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
-- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
+- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes (uncompressed). For other queries, the column value is 0.
- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query.
- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result.
- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query.
@@ -111,6 +111,11 @@ Columns:
- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution.
- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution.
- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution.
+- `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values:
+ - `'Unknown'` = Status unknown.
+ - `'None'` = The query result was neither written into nor read from the query cache.
+ - `'Write'` = The query result was written into the query cache.
+ - `'Read'` = The query result was read from the query cache.
**Example**
@@ -186,6 +191,7 @@ used_formats: []
used_functions: []
used_storages: []
used_table_functions: []
+query_cache_usage: None
```
**See Also**
diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md
index 3085b1acaf4..df482261ae8 100644
--- a/docs/en/operations/system-tables/server_settings.md
+++ b/docs/en/operations/system-tables/server_settings.md
@@ -14,6 +14,7 @@ Columns:
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
**Example**
@@ -26,14 +27,22 @@ WHERE name LIKE '%thread_pool%'
```
``` text
-┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
-│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
-│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
-│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
-│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
-│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
-│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
-└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
+┌─name────────────────────────────────────────_─value─_─default─_─changed─_─description──────────────────────────────────────────────────────────────────────────────────────────────────────
+───────────────────────────────────_─type───_─is_obsolete─┐
+│ max_thread_pool_size │ 10000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ 0 │
+│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ 0 │
+│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ 0 │
+│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ 0 │
+│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ 0 │
+│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ 0 │
+│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ 0 │
+│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ 0 │
+│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ 0 │
+│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ 0 │
+│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ 0 │
+│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ 0 │
+└─────────────────────────────────────────────┴───────┴─────────┴─────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+───────────────────────────────────┴────────┴─────────────┘
```
Using of `WHERE changed` can be useful, for example, when you want to check
diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md
index afae45077cc..7dd2345a2d0 100644
--- a/docs/en/operations/system-tables/settings.md
+++ b/docs/en/operations/system-tables/settings.md
@@ -17,6 +17,7 @@ Columns:
- `0` — Current user can change the setting.
- `1` — Current user can’t change the setting.
- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value.
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
**Example**
@@ -29,11 +30,14 @@ WHERE name LIKE '%min_i%'
```
``` text
-┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐
-│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
-│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
-│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
-└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘
+┌─name───────────────────────────────────────────────_─value─────_─changed─_─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────_─min──_─max──_─readonly─_─type─────────_─default───_─alias_for─_─is_obsolete─┐
+│ min_insert_block_size_rows │ 1048449 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 1048449 │ │ 0 │
+│ min_insert_block_size_bytes │ 268402944 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 268402944 │ │ 0 │
+│ min_insert_block_size_rows_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │
+│ min_insert_block_size_bytes_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │
+│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ____ │ ____ │ 0 │ Milliseconds │ 1000 │ │ 0 │
+└────────────────────────────────────────────────────┴───────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+──────────────────────────────────────────────────────┴──────┴──────┴──────────┴──────────────┴───────────┴───────────┴─────────────┘
```
Using of `WHERE changed` can be useful, for example, when you want to check:
diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md
index 77f816fe428..2140d22b620 100644
--- a/docs/en/operations/utilities/clickhouse-keeper-client.md
+++ b/docs/en/operations/utilities/clickhouse-keeper-client.md
@@ -11,7 +11,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
-- `-p N`, `--port=N` — Server port. Default value: 2181
+- `-p N`, `--port=N` — Server port. Default value: 9181
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
@@ -21,8 +21,8 @@ A client application to interact with clickhouse-keeper by its native protocol.
## Example {#clickhouse-keeper-client-example}
```bash
-./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
-Connected to ZooKeeper at [::1]:2181 with session_id 137
+./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
+Connected to ZooKeeper at [::1]:9181 with session_id 137
/ :) ls
keeper foo bar
/ :) cd keeper
@@ -45,9 +45,14 @@ keeper foo bar
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
- `cd [path]` -- Change the working path (default `.`)
- `set [version]` -- Updates the node's value. Only update if version matches (default: -1)
-- `create ` -- Creates new node
+- `create [mode]` -- Creates new node with the set value
+- `touch ` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
- `get ` -- Returns the node's value
- `remove ` -- Remove the node
- `rmr ` -- Recursively deletes path. Confirmation required
- `flwc ` -- Executes four-letter-word command
- `help` -- Prints this message
+- `get_stat [path]` -- Returns the node's stat (default `.`)
+- `find_super_nodes [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
+- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
+- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index 0443a80cf17..0b17afb7e12 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -34,7 +34,13 @@ The binary you just downloaded can run all sorts of ClickHouse tools and utiliti
A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL.
-If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
+If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
+
+```bash
+./clickhouse local -q "SELECT * FROM 'reviews.tsv'"
+```
+
+This command is a shortcut of:
```bash
./clickhouse local -q "SELECT * FROM file('reviews.tsv')"
diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md
index a395b350a55..18ff5073e3f 100644
--- a/docs/en/sql-reference/aggregate-functions/combinators.md
+++ b/docs/en/sql-reference/aggregate-functions/combinators.md
@@ -300,7 +300,7 @@ SELECT groupArrayResample(30, 75, 30)(name, age) FROM people
Consider the results.
-`Jonh` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals.
+`John` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals.
Now let’s count the total number of people and their average wage in the specified age intervals.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md
index db19f524b31..f79fe66c05d 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/any.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/any.md
@@ -12,3 +12,5 @@ To get a determinate result, you can use the ‘min’ or ‘max’ function ins
In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY.
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
+
+- Alias: `any_value`
diff --git a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md
new file mode 100644
index 00000000000..3c71129bdb5
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md
@@ -0,0 +1,32 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg
+sidebar_position: 110
+---
+
+# array_concat_agg
+- Alias of `groupArrayArray`. The function is case insensitive.
+
+**Example**
+
+```text
+SELECT *
+FROM t
+
+┌─a───────┐
+│ [1,2,3] │
+│ [4,5] │
+│ [6] │
+└─────────┘
+
+```
+
+Query:
+
+```sql
+SELECT array_concat_agg(a) AS a
+FROM t
+
+┌─a─────────────┐
+│ [1,2,3,4,5,6] │
+└───────────────┘
+```
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md
index 18048fa4f71..ad678443df6 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md
@@ -44,3 +44,5 @@ Result:
```
The groupArray function will remove ᴺᵁᴸᴸ value based on the above results.
+
+- Alias: `array_agg`.
diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md
index 0da273e01ad..c99c8791542 100644
--- a/docs/en/sql-reference/data-types/datetime.md
+++ b/docs/en/sql-reference/data-types/datetime.md
@@ -140,8 +140,9 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
-- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format)
-- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format)
+- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format)
+- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format)
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
+- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
- [The `Date` data type](../../sql-reference/data-types/date.md)
diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md
index 793691850b1..3b80e8b1a8b 100644
--- a/docs/en/sql-reference/data-types/datetime64.md
+++ b/docs/en/sql-reference/data-types/datetime64.md
@@ -119,6 +119,7 @@ FROM dt;
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
+- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-for-working-with-dates-and-times)
- [`Date` data type](../../sql-reference/data-types/date.md)
- [`DateTime` data type](../../sql-reference/data-types/datetime.md)
diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md
index dfe0eda2e21..3add754e6b6 100644
--- a/docs/en/sql-reference/data-types/tuple.md
+++ b/docs/en/sql-reference/data-types/tuple.md
@@ -4,7 +4,7 @@ sidebar_position: 54
sidebar_label: Tuple(T1, T2, ...)
---
-# Tuple(t1, T2, …)
+# Tuple(T1, T2, …)
A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element.
diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md
index b0f19f0d8be..40f756b9588 100644
--- a/docs/en/sql-reference/data-types/uuid.md
+++ b/docs/en/sql-reference/data-types/uuid.md
@@ -6,42 +6,42 @@ sidebar_label: UUID
# UUID
-A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier).
+A Universally Unique Identifier (UUID) is a 16-byte value used to identify records. For detailed information about UUIDs, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier).
-The example of UUID type value is represented below:
+While different UUID variants exist (see [here](https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis)), ClickHouse does not validate that inserted UUIDs conform to a particular variant. UUIDs are internally treated as a sequence of 16 random bytes with [8-4-4-4-12 representation](https://en.wikipedia.org/wiki/Universally_unique_identifier#Textual_representation) at SQL level.
+
+Example UUID value:
``` text
61f0c404-5cb3-11e7-907b-a6006ad3dba0
```
-If you do not specify the UUID column value when inserting a new record, the UUID value is filled with zero:
+The default UUID is all-zero. It is used, for example, when a new record is inserted but no value for a UUID column is specified:
``` text
00000000-0000-0000-0000-000000000000
```
-## How to Generate
+## Generating UUIDs
-To generate the UUID value, ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function.
+ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function to generate random UUID version 4 values.
## Usage Example
**Example 1**
-This example demonstrates creating a table with the UUID type column and inserting a value into the table.
+This example demonstrates the creation of a table with a UUID column and the insertion of a value into the table.
``` sql
CREATE TABLE t_uuid (x UUID, y String) ENGINE=TinyLog
-```
-``` sql
INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1'
-```
-``` sql
SELECT * FROM t_uuid
```
+Result:
+
``` text
┌────────────────────────────────────x─┬─y─────────┐
│ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │
@@ -50,13 +50,11 @@ SELECT * FROM t_uuid
**Example 2**
-In this example, the UUID column value is not specified when inserting a new record.
+In this example, no UUID column value is specified when the record is inserted, i.e. the default UUID value is inserted:
``` sql
INSERT INTO t_uuid (y) VALUES ('Example 2')
-```
-``` sql
SELECT * FROM t_uuid
```
diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index 6c3d80683db..dd8031461e0 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -1092,7 +1092,7 @@ Types of sources (`source_type`):
- [Local file](#local_file)
- [Executable File](#executable)
- [Executable Pool](#executable_pool)
-- [HTTP(s)](#http)
+- [HTTP(S)](#http)
- DBMS
- [ODBC](#odbc)
- [MySQL](#mysql)
@@ -1102,7 +1102,7 @@ Types of sources (`source_type`):
- [Cassandra](#cassandra)
- [PostgreSQL](#postgresql)
-## Local File {#local_file}
+### Local File {#local_file}
Example of settings:
@@ -1132,7 +1132,7 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION
- [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function)
-## Executable File {#executable}
+### Executable File {#executable}
Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
@@ -1161,7 +1161,7 @@ Setting fields:
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
-## Executable Pool {#executable_pool}
+### Executable Pool {#executable_pool}
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts.
@@ -1196,9 +1196,9 @@ Setting fields:
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
-## Http(s) {#https}
+### HTTP(S) {#https}
-Working with an HTTP(s) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
+Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
Example of settings:
@@ -1248,7 +1248,55 @@ Setting fields:
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.
-### Known Vulnerability of the ODBC Dictionary Functionality
+### DBMS
+
+#### ODBC
+
+You can use this method to connect any database that has an ODBC driver.
+
+Example of settings:
+
+``` xml
+
+```
+
+or
+
+``` sql
+SOURCE(ODBC(
+ db 'DatabaseName'
+ table 'SchemaName.TableName'
+ connection_string 'DSN=some_parameters'
+ invalidate_query 'SQL_QUERY'
+ query 'SELECT id, value_1, value_2 FROM db_name.table_name'
+))
+```
+
+Setting fields:
+
+- `db` – Name of the database. Omit it if the database name is set in the `` parameters.
+- `table` – Name of the table and schema if exists.
+- `connection_string` – Connection string.
+- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
+- `query` – The custom query. Optional parameter.
+
+:::note
+The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared.
+:::
+
+ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database.
+
+If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item.
+
+##### Known Vulnerability of the ODBC Dictionary Functionality
:::note
When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised.
@@ -1277,7 +1325,7 @@ SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db');
ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`.
-### Example of Connecting Postgresql
+##### Example of Connecting Postgresql
Ubuntu OS.
@@ -1358,7 +1406,7 @@ LIFETIME(MIN 300 MAX 360)
You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`.
-### Example of Connecting MS SQL Server
+##### Example of Connecting MS SQL Server
Ubuntu OS.
@@ -1462,55 +1510,7 @@ LAYOUT(FLAT())
LIFETIME(MIN 300 MAX 360)
```
-## DBMS
-
-### ODBC
-
-You can use this method to connect any database that has an ODBC driver.
-
-Example of settings:
-
-``` xml
-
-```
-
-or
-
-``` sql
-SOURCE(ODBC(
- db 'DatabaseName'
- table 'SchemaName.TableName'
- connection_string 'DSN=some_parameters'
- invalidate_query 'SQL_QUERY'
- query 'SELECT id, value_1, value_2 FROM db_name.table_name'
-))
-```
-
-Setting fields:
-
-- `db` – Name of the database. Omit it if the database name is set in the `` parameters.
-- `table` – Name of the table and schema if exists.
-- `connection_string` – Connection string.
-- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
-- `query` – The custom query. Optional parameter.
-
-:::note
-The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared.
-:::
-
-ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database.
-
-If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item.
-
-### Mysql
+#### Mysql
Example of settings:
@@ -1627,7 +1627,7 @@ SOURCE(MYSQL(
))
```
-### ClickHouse
+#### ClickHouse
Example of settings:
@@ -1680,7 +1680,7 @@ Setting fields:
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
-### Mongodb
+#### Mongodb
Example of settings:
@@ -1723,7 +1723,7 @@ Setting fields:
- `options` - MongoDB connection string options (optional parameter).
-### Redis
+#### Redis
Example of settings:
@@ -1756,7 +1756,7 @@ Setting fields:
- `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`.
- `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0.
-### Cassandra
+#### Cassandra
Example of settings:
@@ -1798,7 +1798,7 @@ Setting fields:
The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared.
:::
-### PostgreSQL
+#### PostgreSQL
Example of settings:
@@ -1855,7 +1855,7 @@ Setting fields:
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
-## Null
+### Null
A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables.
@@ -2476,52 +2476,3 @@ Dictionary updates (other than loading at first use) do not block queries. Durin
We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server.
There are also functions for working with OS identifiers and search engines, but they shouldn’t be used.
-
-## Embedded Dictionaries
-
-
-
-ClickHouse contains a built-in feature for working with a geobase.
-
-This allows you to:
-
-- Use a region’s ID to get its name in the desired language.
-- Use a region’s ID to get the ID of a city, area, federal district, country, or continent.
-- Check whether a region is part of another region.
-- Get a chain of parent regions.
-
-All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”.
-
-The internal dictionaries are disabled in the default package.
-To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file.
-
-The geobase is loaded from text files.
-
-Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory.
-
-Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory.
-
-You can also create these files yourself. The file format is as follows:
-
-`regions_hierarchy*.txt`: TabSeparated (no header), columns:
-
-- region ID (`UInt32`)
-- parent region ID (`UInt32`)
-- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values
-- population (`UInt32`) — optional column
-
-`regions_names_*.txt`: TabSeparated (no header), columns:
-
-- region ID (`UInt32`)
-- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones.
-
-A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million.
-
-Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated.
-For updates, the file modification times are checked. If a file has changed, the dictionary is updated.
-The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter.
-Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries.
-
-We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server.
-
-There are also functions for working with OS identifiers and search engines, but they shouldn’t be used.
diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index 64fae0e82f0..69f1816b7df 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -6,9 +6,20 @@ sidebar_label: Arithmetic
# Arithmetic Functions
-The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit.
+Arithmetic functions work for any two operands of type `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64`.
-The result of addition or multiplication of two integers is unsigned unless one of the integers is signed.
+Before performing the operation, both operands are casted to the result type. The result type is determined as follows (unless specified
+differently in the function documentation below):
+- If both operands are up to 32 bits wide, the size of the result type will be the size of the next bigger type following the bigger of the
+ two operands (integer size promotion). For example, `UInt8 + UInt16 = UInt32` or `Float32 * Float32 = Float64`.
+- If one of the operands has 64 or more bits, the size of the result type will be the same size as the bigger of the two operands. For
+ example, `UInt32 + UInt128 = UInt128` or `Float32 * Float64 = Float64`.
+- If one of the operands is signed, the result type will also be signed, otherwise it will be signed. For example, `UInt32 * Int32 = Int64`.
+
+These rules make sure that the result type will be the smallest type which can represent all possible results. While this introduces a risk
+of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of
+64 bit. This behavior also guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer
+type.
Example:
@@ -22,8 +33,6 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0
└───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘
```
-Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values.
-
Overflows are produced the same way as in C++.
## plus
@@ -68,7 +77,7 @@ Alias: `a \* b` (operator)
## divide
-Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function.
+Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function.
Division by 0 returns `inf`, `-inf`, or `nan`.
@@ -84,7 +93,7 @@ Alias: `a / b` (operator)
Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer.
-The result has the same type as the dividend (the first parameter).
+The result has the same width as the dividend (the first parameter).
An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one.
@@ -135,7 +144,7 @@ intDivOrZero(a, b)
Calculates the remainder of the division of two values `a` by `b`.
-The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number.
+The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md).
The remainder is computed like in C++. Truncated division is used for negative numbers.
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 7f2b8f3c605..f66994ed5a6 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -102,6 +102,8 @@ The function also works for strings.
Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`.
+Alias: `OCTET_LENGTH`
+
## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64
## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64
@@ -142,6 +144,7 @@ range([start, ] end [, step])
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments.
- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
+- Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type).
**Examples**
@@ -180,9 +183,8 @@ arrayConcat(arrays)
**Arguments**
- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type.
- **Example**
-
+**Example**
``` sql
SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res
@@ -878,7 +880,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen
## arrayDifference
-Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`).
+Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`).
**Syntax**
@@ -996,6 +998,24 @@ SELECT
└──────────────┴───────────┘
```
+## arrayJaccardIndex
+
+Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays.
+
+**Example**
+
+Query:
+``` sql
+SELECT arrayJaccardIndex([1, 2], [2, 3]) AS res
+```
+
+Result:
+``` text
+┌─res────────────────┐
+│ 0.3333333333333333 │
+└────────────────────┘
+```
+
## arrayReduce
Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`.
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 3f61e7a214d..87d84425029 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -694,10 +694,14 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we
Returns year and week for a date. The year in the result may be different from the year in the date argument for the first and the last week of the year.
-The mode argument works exactly like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used.
+The mode argument works like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used.
`toISOYear()` is a compatibility function that is equivalent to `intDiv(toYearWeek(date,3),100)`.
+:::warning
+The week number returned by `toYearWeek()` can be different from what the `toWeek()` returns. `toWeek()` always returns week number in the context of the given year, and in case `toWeek()` returns `0`, `toYearWeek()` returns the value corresponding to the last week of previous year. See `prev_yearWeek` in example below.
+:::
+
**Syntax**
``` sql
@@ -707,18 +711,18 @@ toYearWeek(t[, mode[, timezone]])
**Example**
``` sql
-SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9;
+SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9, toYearWeek(toDate('2022-01-01')) AS prev_yearWeek;
```
``` text
-┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐
-│ 2016-12-27 │ 201652 │ 201652 │ 201701 │
-└────────────┴───────────┴───────────┴───────────┘
+┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┬─prev_yearWeek─┐
+│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ 202152 │
+└────────────┴───────────┴───────────┴───────────┴───────────────┘
```
## age
-Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second.
+Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
For an alternative to `age`, see function `date\_diff`.
@@ -734,6 +738,8 @@ age('unit', startdate, enddate, [timezone])
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
+ - `microsecond` (possible abbreviations: `us`, `u`)
+ - `millisecond` (possible abbreviations: `ms`)
- `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`)
@@ -809,6 +815,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
+ - `microsecond` (possible abbreviations: `us`, `u`)
+ - `millisecond` (possible abbreviations: `ms`)
- `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`)
@@ -1130,6 +1138,8 @@ Result:
Returns the current date and time at the moment of query analysis. The function is a constant expression.
+Alias: `current_timestamp`.
+
**Syntax**
``` sql
@@ -1260,6 +1270,8 @@ Result:
Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as ‘toDate(now())’.
+Aliases: `curdate`, `current_date`.
+
## yesterday
Accepts zero arguments and returns yesterday’s date at one of the moments of query analysis.
@@ -1437,7 +1449,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %n | new-line character (‘’) | |
| %p | AM or PM designation | PM |
| %Q | Quarter (1-4) | 1 |
-| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM |
+| %r | 12-hour HH:MM AM/PM time, equivalent to %h:%i %p | 10:30 PM |
| %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 |
| %s | second (00-59) | 44 |
| %S | second (00-59) | 44 |
diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md
index 73d72aa50e5..d62cd1db88d 100644
--- a/docs/en/sql-reference/functions/files.md
+++ b/docs/en/sql-reference/functions/files.md
@@ -18,7 +18,7 @@ file(path[, default])
**Arguments**
-- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
+- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
**Example**
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 26dcccfd42e..bfbd26551d3 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -2552,3 +2552,187 @@ Result:
This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables.
+## structureToCapnProtoSchema {#structure_to_capn_proto_schema}
+
+Converts ClickHouse table structure to CapnProto schema.
+
+**Syntax**
+
+``` sql
+structureToCapnProtoSchema(structure)
+```
+
+**Arguments**
+
+- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`.
+- `root_struct_name` — Name for root struct in CapnProto schema. Default value - `Message`;
+
+**Returned value**
+
+- CapnProto schema
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB
+```
+
+Result:
+
+``` text
+@0xf96402dd754d0eb7;
+
+struct Message
+{
+ column1 @0 : Data;
+ column2 @1 : UInt32;
+ column3 @2 : List(Data);
+}
+```
+
+Query:
+
+``` sql
+SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB
+```
+
+Result:
+
+``` text
+@0xd1c8320fecad2b7f;
+
+struct Message
+{
+ struct Column1
+ {
+ union
+ {
+ value @0 : Data;
+ null @1 : Void;
+ }
+ }
+ column1 @0 : Column1;
+ struct Column2
+ {
+ element1 @0 : UInt32;
+ element2 @1 : List(Data);
+ }
+ column2 @1 : Column2;
+ struct Column3
+ {
+ struct Entry
+ {
+ key @0 : Data;
+ value @1 : Data;
+ }
+ entries @0 : List(Entry);
+ }
+ column3 @2 : Column3;
+}
+```
+
+Query:
+
+``` sql
+SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB
+```
+
+Result:
+
+``` text
+@0x96ab2d4ab133c6e1;
+
+struct Root
+{
+ column1 @0 : Data;
+ column2 @1 : UInt32;
+}
+```
+
+## structureToProtobufSchema {#structure_to_protobuf_schema}
+
+Converts ClickHouse table structure to Protobuf schema.
+
+**Syntax**
+
+``` sql
+structureToProtobufSchema(structure)
+```
+
+**Arguments**
+
+- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`.
+- `root_message_name` — Name for root message in Protobuf schema. Default value - `Message`;
+
+**Returned value**
+
+- Protobuf schema
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB
+```
+
+Result:
+
+``` text
+syntax = "proto3";
+
+message Message
+{
+ bytes column1 = 1;
+ uint32 column2 = 2;
+ repeated bytes column3 = 3;
+}
+```
+
+Query:
+
+``` sql
+SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB
+```
+
+Result:
+
+``` text
+syntax = "proto3";
+
+message Message
+{
+ bytes column1 = 1;
+ message Column2
+ {
+ uint32 element1 = 1;
+ repeated bytes element2 = 2;
+ }
+ Column2 column2 = 2;
+ map column3 = 3;
+}
+```
+
+Query:
+
+``` sql
+SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB
+```
+
+Result:
+
+``` text
+syntax = "proto3";
+
+message Root
+{
+ bytes column1 = 1;
+ uint32 column2 = 2;
+}
+```
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 5175bbf0615..e39bdbae06e 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -90,6 +90,8 @@ Returns the length of a string in bytes (not: in characters or Unicode code poin
The function also works for arrays.
+Alias: `OCTET_LENGTH`
+
## lengthUTF8
Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
@@ -573,6 +575,42 @@ Alias:
Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+## substringIndex(s, delim, count)
+
+Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL.
+
+**Syntax**
+
+```sql
+substringIndex(s, delim, count)
+```
+Alias: `SUBSTRING_INDEX`
+
+
+**Arguments**
+
+- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
+- delim: The character to split. [String](../../sql-reference/data-types/string.md).
+- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
+
+**Example**
+
+``` sql
+SELECT substringIndex('www.clickhouse.com', '.', 2)
+```
+
+Result:
+```
+┌─substringIndex('www.clickhouse.com', '.', 2)─┐
+│ www.clickhouse │
+└──────────────────────────────────────────────┘
+```
+
+## substringIndexUTF8(s, delim, count)
+
+Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
## appendTrailingCharIfAbsent
Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`.
@@ -691,6 +729,30 @@ Returns whether string `str` ends with `suffix`.
endsWith(str, suffix)
```
+## endsWithUTF8
+
+Returns whether string `str` ends with `suffix`, the difference between `endsWithUTF8` and `endsWith` is that `endsWithUTF8` match `str` and `suffix` by UTF-8 characters.
+
+**Syntax**
+
+```sql
+endsWithUTF8(str, suffix)
+```
+
+**Example**
+
+``` sql
+SELECT endsWithUTF8('中国', '\xbd'), endsWith('中国', '\xbd')
+```
+
+Result:
+
+```result
+┌─endsWithUTF8('中国', '½')─┬─endsWith('中国', '½')─┐
+│ 0 │ 1 │
+└──────────────────────────┴──────────────────────┘
+```
+
## startsWith
Returns whether string `str` starts with `prefix`.
@@ -707,6 +769,25 @@ startsWith(str, prefix)
SELECT startsWith('Spider-Man', 'Spi');
```
+## startsWithUTF8
+
+Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters.
+
+
+**Example**
+
+``` sql
+SELECT startsWithUTF8('中国', '\xe4'), startsWith('中国', '\xe4')
+```
+
+Result:
+
+```result
+┌─startsWithUTF8('中国', '⥩─┬─startsWith('中国', '⥩─┐
+│ 0 │ 1 │
+└────────────────────────────┴────────────────────────┘
+```
+
## trim
Removes the specified characters from the start or end of a string. If not specified otherwise, the function removes whitespace (ASCII-character 32).
@@ -1253,3 +1334,48 @@ Result:
│ A240 │
└──────────────────┘
```
+
+## initcap
+
+Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
+
+## initcapUTF8
+
+Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
+
+If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
+
+## firstLine
+
+Returns the first line from a multi-line string.
+
+**Syntax**
+
+```sql
+firstLine(val)
+```
+
+**Arguments**
+
+- `val` - Input value. [String](../data-types/string.md)
+
+**Returned value**
+
+- The first line of the input value or the whole value if there is no line
+ separators. [String](../data-types/string.md)
+
+**Example**
+
+```sql
+select firstLine('foo\nbar\nbaz');
+```
+
+Result:
+
+```result
+┌─firstLine('foo\nbar\nbaz')─┐
+│ foo │
+└────────────────────────────┘
+```
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 3d8f89f7295..c10a1036677 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -631,3 +631,53 @@ Result:
│ 100 │ 200 │ 100-200 │ 100 │
└──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘
```
+
+## hasSubsequence
+
+Returns 1 if needle is a subsequence of haystack, or 0 otherwise.
+A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements.
+
+
+**Syntax**
+
+``` sql
+hasSubsequence(haystack, needle)
+```
+
+**Arguments**
+
+- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
+- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
+
+**Returned values**
+
+- 1, if needle is a subsequence of haystack.
+- 0, otherwise.
+
+Type: `UInt8`.
+
+**Examples**
+
+``` sql
+SELECT hasSubsequence('garbage', 'arg') ;
+```
+
+Result:
+
+``` text
+┌─hasSubsequence('garbage', 'arg')─┐
+│ 1 │
+└──────────────────────────────────┘
+```
+
+## hasSubsequenceCaseInsensitive
+
+Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
+
+## hasSubsequenceUTF8
+
+Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
+
+## hasSubsequenceCaseInsensitiveUTF8
+
+Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
\ No newline at end of file
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index 7ed2deaeda6..88e4ac03fdb 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -559,6 +559,29 @@ Result:
└────────────────────────────┘
```
+## tupleConcat
+
+Combines tuples passed as arguments.
+
+``` sql
+tupleConcat(tuples)
+```
+
+**Arguments**
+
+- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type.
+
+**Example**
+
+``` sql
+SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res
+```
+
+``` text
+┌─res──────────────────┐
+│ (1,2,3,4,true,false) │
+└──────────────────────┘
+```
## Distance functions
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 214c885bc0e..36f40b37238 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -399,7 +399,11 @@ toDateTime(expr[, time_zone ])
- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md).
- `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md).
-If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
+:::note
+If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
+If `expr` is a [String](/docs/en/sql-reference/data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time.
+Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed.
+:::
**Returned value**
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index dae2c7dd1d3..6ceb9b5849e 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC
Syntax:
```sql
-ALTER TABLE table_name MODIFY column_name REMOVE property;
+ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Example**
diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md
index 7dadc2be5b2..dca34d16f25 100644
--- a/docs/en/sql-reference/statements/alter/index.md
+++ b/docs/en/sql-reference/statements/alter/index.md
@@ -36,6 +36,8 @@ These `ALTER` statements modify entities related to role-based access control:
[ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not.
+[ALTER NAMED COLLECTION](/docs/en/sql-reference/statements/alter/named-collection.md) statement modifies [Named Collections](/docs/en/operations/named-collections.md).
+
## Mutations
`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts.
diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md
new file mode 100644
index 00000000000..ac6752127c1
--- /dev/null
+++ b/docs/en/sql-reference/statements/alter/named-collection.md
@@ -0,0 +1,30 @@
+---
+slug: /en/sql-reference/statements/alter/named-collection
+sidebar_label: NAMED COLLECTION
+---
+
+# ALTER NAMED COLLECTION
+
+This query intends to modify already existing named collections.
+
+**Syntax**
+
+```sql
+ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster]
+[ SET
+key_name1 = 'some value',
+key_name2 = 'some value',
+key_name3 = 'some value',
+... ] |
+[ DELETE key_name4, key_name5, ... ]
+```
+
+**Example**
+
+```sql
+CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
+
+ALTER NAMED COLLECTION foobar SET a = '2', c = '3';
+
+ALTER NAMED COLLECTION foobar DELETE b;
+```
diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md
index 14e29d051d7..fa39526a53e 100644
--- a/docs/en/sql-reference/statements/create/index.md
+++ b/docs/en/sql-reference/statements/create/index.md
@@ -8,13 +8,14 @@ sidebar_label: CREATE
Create queries make a new entity of one of the following kinds:
-- [DATABASE](../../../sql-reference/statements/create/database.md)
-- [TABLE](../../../sql-reference/statements/create/table.md)
-- [VIEW](../../../sql-reference/statements/create/view.md)
-- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md)
-- [FUNCTION](../../../sql-reference/statements/create/function.md)
-- [USER](../../../sql-reference/statements/create/user.md)
-- [ROLE](../../../sql-reference/statements/create/role.md)
-- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md)
-- [QUOTA](../../../sql-reference/statements/create/quota.md)
-- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md)
+- [DATABASE](/docs/en/sql-reference/statements/create/database.md)
+- [TABLE](/docs/en/sql-reference/statements/create/table.md)
+- [VIEW](/docs/en/sql-reference/statements/create/view.md)
+- [DICTIONARY](/docs/en/sql-reference/statements/create/dictionary.md)
+- [FUNCTION](/docs/en/sql-reference/statements/create/function.md)
+- [USER](/docs/en/sql-reference/statements/create/user.md)
+- [ROLE](/docs/en/sql-reference/statements/create/role.md)
+- [ROW POLICY](/docs/en/sql-reference/statements/create/row-policy.md)
+- [QUOTA](/docs/en/sql-reference/statements/create/quota.md)
+- [SETTINGS PROFILE](/docs/en/sql-reference/statements/create/settings-profile.md)
+- [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md)
diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md
new file mode 100644
index 00000000000..1fc7b11c554
--- /dev/null
+++ b/docs/en/sql-reference/statements/create/named-collection.md
@@ -0,0 +1,34 @@
+---
+slug: /en/sql-reference/statements/create/named-collection
+sidebar_label: NAMED COLLECTION
+---
+
+# CREATE NAMED COLLECTION
+
+Creates a new named collection.
+
+**Syntax**
+
+```sql
+CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS
+key_name1 = 'some value',
+key_name2 = 'some value',
+key_name3 = 'some value',
+...
+```
+
+**Example**
+
+```sql
+CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
+```
+
+**Related statements**
+
+- [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection)
+- [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function)
+
+
+**See Also**
+
+- [Named collections guide](/docs/en/operations/named-collections.md)
diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md
index c69285171ab..a6ced870c18 100644
--- a/docs/en/sql-reference/statements/create/quota.md
+++ b/docs/en/sql-reference/statements/create/quota.md
@@ -11,6 +11,7 @@ Syntax:
``` sql
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
+ [IN access_storage_type]
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md
index 9b14e220e1f..4b6fffe4f60 100644
--- a/docs/en/sql-reference/statements/create/role.md
+++ b/docs/en/sql-reference/statements/create/role.md
@@ -11,6 +11,7 @@ Syntax:
``` sql
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...]
+ [IN access_storage_type]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md
index 83bb2e6bb9a..cd7718793bd 100644
--- a/docs/en/sql-reference/statements/create/row-policy.md
+++ b/docs/en/sql-reference/statements/create/row-policy.md
@@ -16,6 +16,7 @@ Syntax:
``` sql
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1|db1.*
[, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2|db2.* ...]
+ [IN access_storage_type]
[FOR SELECT] USING condition
[AS {PERMISSIVE | RESTRICTIVE}]
[TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}]
diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md
index 8e221a4d82f..d8afce9d6de 100644
--- a/docs/en/sql-reference/statements/create/settings-profile.md
+++ b/docs/en/sql-reference/statements/create/settings-profile.md
@@ -12,6 +12,7 @@ Syntax:
``` sql
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
+ [IN access_storage_type]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index d168be63c36..11d4eae7bc8 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -14,6 +14,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
+ [IN access_storage_type]
[DEFAULT ROLE role [,...]]
[DEFAULT DATABASE database | NONE]
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 10b15638152..11026340a0f 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i
:::
```sql
-CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
+CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
```
Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md
index b6208c2fd52..eed76dbcd5c 100644
--- a/docs/en/sql-reference/statements/drop.md
+++ b/docs/en/sql-reference/statements/drop.md
@@ -49,7 +49,7 @@ Deletes a user.
Syntax:
``` sql
-DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
+DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type]
```
## DROP ROLE
@@ -59,7 +59,7 @@ Deletes a role. The deleted role is revoked from all the entities where it was a
Syntax:
``` sql
-DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
+DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type]
```
## DROP ROW POLICY
@@ -69,7 +69,7 @@ Deletes a row policy. Deleted row policy is revoked from all the entities where
Syntax:
``` sql
-DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name]
+DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] [FROM access_storage_type]
```
## DROP QUOTA
@@ -79,7 +79,7 @@ Deletes a quota. The deleted quota is revoked from all the entities where it was
Syntax:
``` sql
-DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
+DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type]
```
## DROP SETTINGS PROFILE
@@ -89,7 +89,7 @@ Deletes a settings profile. The deleted settings profile is revoked from all the
Syntax:
``` sql
-DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
+DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type]
```
## DROP VIEW
@@ -119,3 +119,20 @@ DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster]
CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;
DROP FUNCTION linear_equation;
```
+
+## DROP NAMED COLLECTION
+
+Deletes a named collection.
+
+**Syntax**
+
+``` sql
+DROP NAMED COLLECTION [IF EXISTS] name [on CLUSTER cluster]
+```
+
+**Example**
+
+``` sql
+CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
+DROP NAMED COLLECTION foobar;
+```
diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index d6e30827f9b..e0cc98c2351 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -11,7 +11,7 @@ Inserts data into a table.
**Syntax**
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
@@ -107,7 +107,7 @@ If table has [constraints](../../sql-reference/statements/create/table.md#constr
**Syntax**
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ...
```
Columns are mapped according to their position in the SELECT clause. However, their names in the SELECT expression and the table for INSERT may differ. If necessary, type casting is performed.
@@ -126,7 +126,7 @@ To insert a default value instead of `NULL` into a column with not nullable data
**Syntax**
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
```
Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause.
diff --git a/docs/en/sql-reference/statements/move.md b/docs/en/sql-reference/statements/move.md
new file mode 100644
index 00000000000..fac738ff711
--- /dev/null
+++ b/docs/en/sql-reference/statements/move.md
@@ -0,0 +1,32 @@
+---
+slug: /en/sql-reference/statements/move
+sidebar_position: 54
+sidebar_label: MOVE
+---
+
+# MOVE access entity statement
+
+This statement allows to move an access entity from one access storage to another.
+
+Syntax:
+
+```sql
+MOVE {USER, ROLE, QUOTA, SETTINGS PROFILE, ROW POLICY} name1 [, name2, ...] TO access_storage_type
+```
+
+Currently, there are five access storages in ClickHouse:
+ - `local_directory`
+ - `memory`
+ - `replicated`
+ - `users_xml` (ro)
+ - `ldap` (ro)
+
+Examples:
+
+```sql
+MOVE USER test TO local_directory
+```
+
+```sql
+MOVE ROLE test TO memory
+```
diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md
index 4654f249548..a59ef463419 100644
--- a/docs/en/sql-reference/statements/select/with.md
+++ b/docs/en/sql-reference/statements/select/with.md
@@ -5,7 +5,27 @@ sidebar_label: WITH
# WITH Clause
-ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
+ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
+
+Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case.
+
+An example of such behavior is below
+``` sql
+with cte_numbers as
+(
+ select
+ num
+ from generateRandom('num UInt64', NULL)
+ limit 1000000
+)
+select
+ count()
+from cte_numbers
+where num in (select num from cte_numbers)
+```
+If CTEs were to pass exactly the results and not just a piece of code, you would always see `1000000`
+
+However, due to the fact that we are referring `cte_numbers` twice, random numbers are generated each time and, accordingly, we see different random results, `280501, 392454, 261636, 196227` and so on...
## Syntax
diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 336b93db9d5..1c399d2072b 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -205,7 +205,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ
The optional keyword `FULL` causes the output to include the collation, comment and privilege columns.
-`SHOW COLUMNS` produces a result table with the following structure:
+The statement produces a result table with the following structure:
- field - The name of the column (String)
- type - The column data type (String)
- null - If the column data type is Nullable (UInt8)
@@ -272,6 +272,10 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2
Displays a list of primary and data skipping indexes of a table.
+This statement mostly exists for compatibility with MySQL. System tables [system.tables](../../operations/system-tables/tables.md) (for
+primary keys) and [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md) (for data skipping indices)
+provide equivalent information but in a fashion more native to ClickHouse.
+
```sql
SHOW [EXTENDED] {INDEX | INDEXES | INDICES | KEYS } {FROM | IN}
[{FROM | IN} ] [WHERE ] [INTO OUTFILE ] [FORMAT ]
```
@@ -281,22 +285,22 @@ equivalent. If no database is specified, the query assumes the current database
The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility.
-`SHOW INDEX` produces a result table with the following structure:
-- table - The name of the table (String)
-- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8)
-- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
-- seq_in_index - Currently unused
-- column_name - Currently unused
-- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String))
-- cardinality - Currently unused
-- sub_part - Currently unused
-- packed - Currently unused
+The statement produces a result table with the following structure:
+- table - The name of the table. (String)
+- non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8)
+- key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String)
+- seq_in_index - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8)
+- column_name - For a primary key index, the name of the column. For a data skipping index: `''` (empty string), see field "expression". (String)
+- collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String))
+- cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64)
+- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String))
+- packed - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String))
- null - Currently unused
-- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String)
-- comment - Currently unused
-- index_comment - Currently unused
-- visible - If the index is visible to the optimizer, always `YES` (String)
-- expression - The index expression (String)
+- index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String)
+- comment - Additional information about the index, currently always `''` (empty string). (String)
+- index_comment - `''` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String)
+- visible - If the index is visible to the optimizer, always `YES`. (String)
+- expression - For a data skipping index, the index expression. For a primary key index: `''` (empty string). (String)
**Examples**
@@ -310,11 +314,12 @@ Result:
``` text
┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │
-│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │
-│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │
-│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │
-│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │
+│ tbl │ 1 │ blf_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │
+│ tbl │ 1 │ mm1_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │
+│ tbl │ 1 │ mm2_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │
+│ tbl │ 1 │ PRIMARY │ 1 │ c │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │
+│ tbl │ 1 │ PRIMARY │ 2 │ a │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │
+│ tbl │ 1 │ set_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │
└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
```
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 65a35f03fbe..59970dbeccd 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -314,6 +314,22 @@ Provides possibility to start background fetch tasks from replication queues whi
SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
+### STOP PULLING REPLICATION LOG
+
+Stops loading new entries from replication log to replication queue in a `ReplicatedMergeTree` table.
+
+``` sql
+SYSTEM STOP PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
+```
+
+### START PULLING REPLICATION LOG
+
+Cancels `SYSTEM STOP PULLING REPLICATION LOG`.
+
+``` sql
+SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
+```
+
### SYNC REPLICA
Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds.
@@ -414,3 +430,29 @@ Will do sync syscall.
```sql
SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
```
+
+
+### SYSTEM STOP LISTEN
+
+Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol.
+
+However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect.
+
+```sql
+SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
+```
+
+- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
+- If `QUERIES ALL` modifier is specified, all protocols are stopped.
+- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped.
+- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped.
+
+### SYSTEM START LISTEN
+
+Allows new connections to be established on the specified protocols.
+
+However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect.
+
+```sql
+SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
+```
diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md
index 457031a2157..4b46210aa09 100644
--- a/docs/en/sql-reference/statements/truncate.md
+++ b/docs/en/sql-reference/statements/truncate.md
@@ -4,8 +4,9 @@ sidebar_position: 52
sidebar_label: TRUNCATE
---
-# TRUNCATE Statement
+# TRUNCATE Statements
+## TRUNCATE TABLE
``` sql
TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
@@ -21,3 +22,10 @@ You can specify how long (in seconds) to wait for inactive replicas to execute `
:::note
If the `alter_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown.
:::
+
+## TRUNCATE DATABASE
+``` sql
+TRUNCATE DATBASE [IF EXISTS] [db.]name [ON CLUSTER cluster]
+```
+
+Removes all tables from a database but keeps the database itself. When the clause `IF EXISTS` is omitted, the query returns an error if the database does not exist.
diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md
index 7bb5d892c47..59c92e1327e 100644
--- a/docs/en/sql-reference/table-functions/azureBlobStorage.md
+++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md
@@ -19,7 +19,7 @@ azureBlobStorage(- connection_string|storage_account_url, container_name, blobpa
- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key)
- `container_name` - Container name
-- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
+- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
- `account_name` - if storage_account_url is used, then account name can be specified here
- `account_key` - if storage_account_url is used, then account key can be specified here
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
diff --git a/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md
new file mode 100644
index 00000000000..20dfd35d5db
--- /dev/null
+++ b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md
@@ -0,0 +1,47 @@
+---
+slug: /en/sql-reference/table-functions/azureBlobStorageCluster
+sidebar_position: 55
+sidebar_label: azureBlobStorageCluster
+title: "azureBlobStorageCluster Table Function"
+---
+
+Allows processing files from [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
+This table function is similar to the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md).
+
+**Syntax**
+
+``` sql
+azureBlobStorageCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
+```
+
+**Arguments**
+
+- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
+- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key)
+- `container_name` - Container name
+- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
+- `account_name` - if storage_account_url is used, then account name can be specified here
+- `account_key` - if storage_account_url is used, then account key can be specified here
+- `format` — The [format](../../interfaces/formats.md#formats) of the file.
+- `compression` — Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. (same as setting to `auto`).
+- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
+
+**Returned value**
+
+A table with the specified structure for reading or writing data in the specified file.
+
+**Examples**
+
+Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster:
+
+``` sql
+SELECT count(*) from azureBlobStorageCluster(
+ 'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1',
+ 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
+ 'auto', 'key UInt64')
+```
+
+**See Also**
+
+- [AzureBlobStorage engine](../../engines/table-engines/integrations/azureBlobStorage.md)
+- [azureBlobStorage table function](../../sql-reference/table-functions/azureBlobStorage.md)
diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md
index 7362c433e0e..a083c6b89a6 100644
--- a/docs/en/sql-reference/table-functions/cluster.md
+++ b/docs/en/sql-reference/table-functions/cluster.md
@@ -16,14 +16,14 @@ All available clusters are listed in the [system.clusters](../../operations/syst
**Syntax**
``` sql
-cluster('cluster_name', db.table[, sharding_key])
-cluster('cluster_name', db, table[, sharding_key])
-clusterAllReplicas('cluster_name', db.table[, sharding_key])
-clusterAllReplicas('cluster_name', db, table[, sharding_key])
+cluster(['cluster_name', db.table, sharding_key])
+cluster(['cluster_name', db, table, sharding_key])
+clusterAllReplicas(['cluster_name', db.table, sharding_key])
+clusterAllReplicas(['cluster_name', db, table, sharding_key])
```
**Arguments**
-- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
+- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers, set `default` if not specified.
- `db.table` or `db`, `table` - Name of a database and a table.
- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard.
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index c78ffc1d61c..4db9494502e 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -13,16 +13,18 @@ The `file` function can be used in `SELECT` and `INSERT` queries to read from or
**Syntax**
``` sql
-file(path [,format] [,structure] [,compression])
+file([path_to_archive ::] path [,format] [,structure] [,compression])
```
**Parameters**
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
+- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`.
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
+
**Returned value**
A table with the specified structure for reading or writing data in the specified file.
@@ -128,13 +130,18 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
└─────────┴─────────┴─────────┘
```
+Getting data from table in table.csv, located in archive1.zip or/and archive2.zip
+``` sql
+SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
+```
+
## Globs in Path
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
-- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
+- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
- `**` - Fetches all files inside the folder recursively.
diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md
index 01b4e4f6a69..48c2381696e 100644
--- a/docs/en/sql-reference/table-functions/gcs.md
+++ b/docs/en/sql-reference/table-functions/gcs.md
@@ -22,7 +22,7 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML
**Arguments**
-- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
+- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
:::note GCS
The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md
index 832be46d05f..75100eeb4f3 100644
--- a/docs/en/sql-reference/table-functions/hdfsCluster.md
+++ b/docs/en/sql-reference/table-functions/hdfsCluster.md
@@ -17,7 +17,7 @@ hdfsCluster(cluster_name, URI, format, structure)
**Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
-- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
+- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md
index 30db0ef00aa..fa86b436a5e 100644
--- a/docs/en/sql-reference/table-functions/iceberg.md
+++ b/docs/en/sql-reference/table-functions/iceberg.md
@@ -21,7 +21,7 @@ iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure])
- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
-Engine parameters can be specified using [Named Collections](../../operations/named-collections.md)
+Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
**Returned value**
diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md
index fba3ea55653..59ed4bf1985 100644
--- a/docs/en/sql-reference/table-functions/remote.md
+++ b/docs/en/sql-reference/table-functions/remote.md
@@ -13,10 +13,10 @@ Both functions can be used in `SELECT` and `INSERT` queries.
## Syntax
``` sql
-remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
-remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
-remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
-remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
+remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key])
+remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
+remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key])
+remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
```
## Parameters
@@ -29,6 +29,8 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
The port is required for an IPv6 address.
+ If only specify this parameter, `db` and `table` will use `system.one` by default.
+
Type: [String](../../sql-reference/data-types/string.md).
- `db` — Database name. Type: [String](../../sql-reference/data-types/string.md).
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 55c825b8b9b..07addafcf58 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -23,7 +23,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_
**Arguments**
-- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
+- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
:::note GCS
The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md
index d5bdc85f9f8..675aef54d34 100644
--- a/docs/en/sql-reference/table-functions/s3Cluster.md
+++ b/docs/en/sql-reference/table-functions/s3Cluster.md
@@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,
**Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
-- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
+- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index 2ab43f1b895..859de86f019 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -56,6 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default.
**See Also**
diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md
index 68fbfe0b22a..cb89a091d68 100644
--- a/docs/en/sql-reference/transactions.md
+++ b/docs/en/sql-reference/transactions.md
@@ -3,23 +3,46 @@ slug: /en/guides/developer/transactional
---
# Transactional (ACID) support
-INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID):
-- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed.
+## Case 1: INSERT into one partition, of one table, of the MergeTree* family
+
+This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes):
+- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted.
- Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted.
-- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen;
-- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
-* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own;
-* INSERT into multiple tables with one statement is possible if materialized views are involved;
-* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional;
-* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable;
-* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting;
-* max_insert_block_size is 1 000 000 by default and can be adjusted as needed;
-* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties;
-* ClickHouse is using MVCC with snapshot isolation internally;
-* all ACID properties are valid even in case of server kill / crash;
-* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup;
-* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
-* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc.
+- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen
+- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
+- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views).
+
+## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family
+
+Same as Case 1 above, with this detail:
+- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own
+
+
+## Case 3: INSERT into one distributed table of the MergeTree* family
+
+Same as Case 1 above, with this detail:
+- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional
+
+## Case 4: Using a Buffer table
+
+- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable
+
+## Case 5: Using async_insert
+
+Same as Case 1 above, with this detail:
+- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured.
+
+## Notes
+- rows inserted from the client in some data format are packed into a single block when:
+ - the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default)
+ - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data
+- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc)
+- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties
+- ClickHouse is using MVCC with snapshot isolation internally
+- all ACID properties are valid even in the case of server kill/crash
+- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup
+- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
+- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback)
## Transactions, Commit, and Rollback
diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md
index 9a1f9c9347d..6b4e612b13f 100644
--- a/docs/ru/development/build-osx.md
+++ b/docs/ru/development/build-osx.md
@@ -68,7 +68,7 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/
$ rm -rf build
$ mkdir build
$ cd build
- $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
+ $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md
index 574d9273088..a4e58e66e0c 100644
--- a/docs/ru/engines/table-engines/special/buffer.md
+++ b/docs/ru/engines/table-engines/special/buffer.md
@@ -9,7 +9,7 @@ sidebar_label: Buffer
Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы.
``` sql
-Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
+Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
```
Параметры движка:
diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md
deleted file mode 100644
index 479616d667b..00000000000
--- a/docs/ru/getting-started/example-datasets/wikistat.md
+++ /dev/null
@@ -1,32 +0,0 @@
----
-slug: /ru/getting-started/example-datasets/wikistat
-sidebar_position: 17
-sidebar_label: WikiStat
----
-
-# WikiStat {#wikistat}
-
-См: http://dumps.wikimedia.org/other/pagecounts-raw/
-
-Создание таблицы:
-
-``` sql
-CREATE TABLE wikistat
-(
- date Date,
- time DateTime,
- project String,
- subproject String,
- path String,
- hits UInt64,
- size UInt64
-) ENGINE = MergeTree(date, (path, time), 8192);
-```
-
-Загрузка данных:
-
-``` bash
-$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt
-$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done
-$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done
-```
diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md
new file mode 120000
index 00000000000..2d429d00984
--- /dev/null
+++ b/docs/ru/getting-started/example-datasets/wikistat.md
@@ -0,0 +1 @@
+../../../en/getting-started/example-datasets/wikistat.md
\ No newline at end of file
diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index 48a6132170a..5571936f4c5 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -401,8 +401,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
- [output_format_csv_crlf_end_of_line](../operations/settings/settings.md#output_format_csv_crlf_end_of_line) - если установлено значение true, конец строки в формате вывода CSV будет `\r\n` вместо `\n`. Значение по умолчанию - `false`.
- [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`.
- [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`.
-- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек.
-Значение по умолчанию - `true`.
+- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`.
+- [input_format_csv_allow_variable_number_of_columns](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`.
## CSVWithNames {#csvwithnames}
@@ -1353,8 +1353,6 @@ ClickHouse поддерживает настраиваемую точность
$ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet"
```
-Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested).
-
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида:
``` bash
@@ -1413,8 +1411,6 @@ ClickHouse поддерживает настраиваемую точность
$ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
```
-Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested).
-
### Вывод данных {#selecting-data-arrow}
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида:
@@ -1471,8 +1467,6 @@ ClickHouse поддерживает настраиваемую точность
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
```
-Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested).
-
### Вывод данных {#selecting-data-2}
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида:
diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md
index b8c5ee77f0c..981f1c7b5a2 100644
--- a/docs/ru/interfaces/http.md
+++ b/docs/ru/interfaces/http.md
@@ -50,7 +50,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@@ -266,9 +266,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков:
``` text
-X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
+X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
+X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
+X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
Возможные поля заголовка:
@@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
%
@@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Absolute Path File
* Connection #0 to host localhost left intact
@@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Relative Path File
* Connection #0 to host localhost left intact
diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md
index 2b824ce91bd..085761d80c7 100644
--- a/docs/ru/operations/configuration-files.md
+++ b/docs/ru/operations/configuration-files.md
@@ -85,6 +85,43 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера.
+## Шифрование {#encryption}
+
+Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать.
+
+В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
+
+Пример:
+
+```xml
+
+
+
+
+ 00112233445566778899aabbccddeeff
+
+
+
+
+ admin
+ 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+
+
+
+```
+
+Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` .
+
+Пример:
+
+``` bash
+./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
+```
+
+``` text
+961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+```
+
## Примеры записи конфигурации на YAML {#example}
Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example).
diff --git a/docs/ru/operations/named-collections.md b/docs/ru/operations/named-collections.md
index ba6b47116ad..48ee7c9f15d 100644
--- a/docs/ru/operations/named-collections.md
+++ b/docs/ru/operations/named-collections.md
@@ -88,7 +88,6 @@ SELECT * FROM s3_engine_table LIMIT 3;
3306test8
- 11
diff --git a/docs/ru/operations/optimizing-performance/profile-guided-optimization.md b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md
new file mode 120000
index 00000000000..31cb656bd99
--- /dev/null
+++ b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md
@@ -0,0 +1 @@
+../../../en/operations/optimizing-performance/profile-guided-optimization.md
\ No newline at end of file
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 5430469ea18..7b026244624 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -575,14 +575,60 @@ ClickHouse поддерживает динамическое изменение
- `errorlog` - Файл лога ошибок.
- `size` - Размер файла. Действует для `log` и `errorlog`. Как только файл достиг размера `size`, ClickHouse архивирует и переименовывает его, а на его месте создает новый файл лога.
- `count` - Количество заархивированных файлов логов, которые сохраняет ClickHouse.
+- `stream_compress` – Сжимать `log` и `errorlog` с помощью алгоритма `lz4`. Чтобы активировать, узтановите значение `1` или `true`.
+
+Имена файлов `log` и `errorlog` (только имя файла, а не директорий) поддерживают спецификаторы шаблонов даты и времени.
+
+**Спецификаторы форматирования**
+С помощью следующих спецификаторов, можно определить шаблон для формирования имени файла. Столбец “Пример” показывает возможные значения на момент времени `2023-07-06 18:32:07`.
+
+| Спецификатор | Описание | Пример |
+|--------------|---------------------------------------------------------------------------------------------------------------------|--------------------------|
+| %% | Литерал % | % |
+| %n | Символ новой строки | |
+| %t | Символ горизонтальной табуляции | |
+| %Y | Год как десятичное число, например, 2017 | 2023 |
+| %y | Последние 2 цифры года в виде десятичного числа (диапазон [00,99]) | 23 |
+| %C | Первые 2 цифры года в виде десятичного числа (диапазон [00,99]) | 20 |
+| %G | Год по неделям согласно [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), то есть год, который содержит указанную неделю. Обычно используется вместе с %V. | 2023 |
+| %g | Последние 2 цифры [года по неделям ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), т.е. года, содержащего указанную неделю (диапазон [00,99]). | 23 |
+| %b | Сокращённое название месяца, например Oct (зависит от локали) | Jul |
+| %h | Синоним %b | Jul |
+| %B | Полное название месяца, например, October (зависит от локали) | July |
+| %m | Месяц в виде десятичного числа (диапазон [01,12]) | 07 |
+| %U | Неделя года в виде десятичного числа (воскресенье - первый день недели) (диапазон [00,53]) | 27 |
+| %W | Неделя года в виде десятичного числа (понедельник - первый день недели) (диапазон [00,53]) | 27 |
+| %V | Неделя года ISO 8601 (диапазон [01,53]) | 27 |
+| %j | День года в виде десятичного числа (диапазон [001,366]) | 187 |
+| %d | День месяца в виде десятичного числа (диапазон [01,31]) Перед одиночной цифрой ставится ноль. | 06 |
+| %e | День месяца в виде десятичного числа (диапазон [1,31]). Перед одиночной цифрой ставится пробел. | 6 |
+| %a | Сокращённое название дня недели, например, Fri (зависит от локали) | Thu |
+| %A | Полный день недели, например, Friday (зависит от локали) | Thursday |
+| %w | День недели в виде десятичного числа, где воскресенье равно 0 (диапазон [0-6]) | 4 |
+| %u | День недели в виде десятичного числа, где понедельник равен 1 (формат ISO 8601) (диапазон [1-7]) | 4 |
+| %H | Час в виде десятичного числа, 24-часовой формат (диапазон [00-23]) | 18 |
+| %I | Час в виде десятичного числа, 12-часовой формат (диапазон [01,12]) | 06 |
+| %M | Минуты в виде десятичного числа (диапазон [00,59]) | 32 |
+| %S | Секунды как десятичное число (диапазон [00,60]) | 07 |
+| %c | Стандартная строка даты и времени, например, Sun Oct 17 04:41:13 2010 (зависит от локали) | Thu Jul 6 18:32:07 2023 |
+| %x | Локализованное представление даты (зависит от локали) | 07/06/23 |
+| %X | Локализованное представление времени, например, 18:40:20 или 6:40:20 PM (зависит от локали) | 18:32:07 |
+| %D | Эквивалентно "%m/%d/%y" | 07/06/23 |
+| %F | Эквивалентно "%Y-%m-%d" (формат даты ISO 8601) | 2023-07-06 |
+| %r | Локализованное 12-часовое время (зависит от локали) | 06:32:07 PM |
+| %R | Эквивалентно "%H:%M" | 18:32 |
+| %T | Эквивалентно "%H:%M:%S" (формат времени ISO 8601) | 18:32:07 |
+| %p | Локализованное обозначение a.m. или p.m. (зависит от локали) | PM |
+| %z | Смещение от UTC в формате ISO 8601 (например, -0430), или без символов, если информация о часовом поясе недоступна | +0800 |
+| %Z | Зависящее от локали название или аббревиатура часового пояса, если информация о часовом поясе доступна | Z AWST |
**Пример**
``` xml
trace
- /var/log/clickhouse-server/clickhouse-server.log
- /var/log/clickhouse-server/clickhouse-server.err.log
+ /var/log/clickhouse-server/clickhouse-server-%F-%T.log
+ /var/log/clickhouse-server/clickhouse-server-%F-%T.err.log1000M10
@@ -1012,6 +1058,10 @@ ClickHouse использует потоки из глобального пул
metric_log
75001000
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1056,7 +1106,7 @@ ClickHouse использует потоки из глобального пул
- verificationMode - Способ проверки сертификатов узла. Подробности находятся в описании класса [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h). Допустимые значения: `none`, `relaxed`, `strict`, `once`.
- verificationDepth - Максимальная длина верификационной цепи. Верификация завершится ошибкой, если длина цепи сертификатов превысит установленное значение.
- loadDefaultCAFile - Признак того, что будут использоваться встроенные CA-сертификаты для OpenSSL. Допустимые значения: `true`, `false`. \|
-- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`.
+- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`.
- cacheSessions - Включение/выключение кеширования сессии. Использовать обязательно вместе с `sessionIdContext`. Допустимые значения: `true`, `false`.
- sessionIdContext - Уникальный набор произвольных символов, которые сервер добавляет к каждому сгенерированному идентификатору. Длина строки не должна превышать `SSL_MAX_SSL_SESSION_ID_LENGTH`. Рекомендуется к использованию всегда, поскольку позволяет избежать проблем как в случае, если сервер кеширует сессию, так и если клиент затребовал кеширование. По умолчанию `${application.name}`.
- sessionCacheSize - Максимальное количество сессий, которые кэширует сервер. По умолчанию - 1024\*20. 0 - неограниченное количество сессий.
@@ -1109,12 +1159,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
-- `database` — имя базы данных;
-- `table` — имя таблицы;
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
-
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
**Пример**
``` xml
@@ -1123,6 +1180,10 @@ ClickHouse использует потоки из глобального пул
part_log
toMonday(event_date)7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1172,11 +1233,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
-- `database` — имя базы данных;
-- `table` — имя таблицы, куда будет записываться лог;
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@@ -1188,6 +1257,10 @@ ClickHouse использует потоки из глобального пул
query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1199,11 +1272,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
-- `database` — имя базы данных;
-- `table` — имя таблицы, куда будет записываться лог;
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@@ -1215,6 +1296,10 @@ ClickHouse использует потоки из глобального пул
query_thread_log
toMonday(event_date)7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1226,11 +1311,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
-- `database` – имя базы данных.
-- `table` – имя системной таблицы, где будут логироваться запросы.
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`.
-- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@@ -1242,6 +1335,10 @@ ClickHouse использует потоки из глобального пул
query_views_log
toYYYYMM(event_date)7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1251,12 +1348,20 @@ ClickHouse использует потоки из глобального пул
Параметры:
-- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице.
-- `database` — имя базы данных для хранения таблицы.
-- `table` — имя таблицы, куда будут записываться текстовые сообщения.
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
**Пример**
```xml
@@ -1266,6 +1371,10 @@ ClickHouse использует потоки из глобального пул
system
text_log
7500
+ 1048576
+ 8192
+ 524288
+ falseEngine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day
@@ -1277,13 +1386,21 @@ ClickHouse использует потоки из глобального пул
Настройки для [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
-Parameters:
+Параметры:
-- `database` — Database for storing a table.
-- `table` — Table name.
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
По умолчанию файл настроек сервера `config.xml` содержит следующие настройки:
@@ -1293,9 +1410,84 @@ Parameters:
trace_log
toYYYYMM(event_date)7500
+ 1048576
+ 8192
+ 524288
```
+## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log}
+
+Настройки для asynchronous_insert_log Система для логирования ассинхронных вставок.
+
+Параметры:
+
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
+
+**Пример**
+
+```xml
+
+
+ system
+
asynchronous_insert_log
+ 7500
+ toYYYYMM(event_date)
+ 1048576
+ 8192
+ 524288
+
+
+
+```
+
+## crash_log {#server_configuration_parameters-crash_log}
+
+Настройки для таблицы [crash_log](../../operations/system-tables/crash-log.md).
+
+Параметры:
+
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1024.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 1024.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: true.
+
+**Пример**
+
+``` xml
+
+ system
+
crash_log
+ toYYYYMM(event_date)
+ 7500
+ 1024
+ 1024
+ 512
+ true
+
+```
+
## query_masking_rules {#query-masking-rules}
Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере,
diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md
index de9bb969085..fb3b18bd46a 100644
--- a/docs/ru/operations/settings/query-complexity.md
+++ b/docs/ru/operations/settings/query-complexity.md
@@ -311,6 +311,52 @@ FORMAT Null;
**Подробности**
-При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом:
+При вставке данных ClickHouse проверяет количество партиций во вставляемом блоке. Если количество разделов превышает число `max_partitions_per_insert_block`, ClickHouse либо логирует предупреждение, либо выбрасывает исключение в зависимости от значения `throw_on_max_partitions_per_insert_block`. Исключения имеют следующий текст:
-> «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by ‘max_partitions_per_insert_block’ setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).»
+> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
+
+## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block}
+
+Позволяет контролировать поведение при достижении `max_partitions_per_insert_block`
+
+- `true` - Когда вставляемый блок достигает `max_partitions_per_insert_block`, возникает исключение.
+- `false` - Записывает предупреждение при достижении `max_partitions_per_insert_block`.
+
+Значение по умолчанию: `true`
+
+## max_sessions_for_user {#max-sessions-per-user}
+
+Максимальное количество одновременных сессий на одного аутентифицированного пользователя.
+
+Пример:
+
+``` xml
+
+
+ 1
+
+
+ 2
+
+
+ 0
+
+
+
+
+
+ single_session_profile
+
+
+
+ two_sessions_profile
+
+
+
+ unlimited_sessions_profile
+
+
+```
+
+Значение по умолчанию: 0 (неограниченное количество сессий).
diff --git a/docs/ru/operations/settings/settings-profiles.md b/docs/ru/operations/settings/settings-profiles.md
index ba2cb9a601f..0d094c637ac 100644
--- a/docs/ru/operations/settings/settings-profiles.md
+++ b/docs/ru/operations/settings/settings-profiles.md
@@ -39,7 +39,7 @@ SET profile = 'web'
8
-
+
1000000000100000000000
@@ -67,6 +67,7 @@ SET profile = 'web'
50100
+ 41
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index f83d05ff710..d3db890ad7a 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -238,39 +238,6 @@ ClickHouse применяет настройку в тех случаях, ко
В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение.
-## input_format_parquet_import_nested {#input_format_parquet_import_nested}
-
-Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet).
-
-Возможные значения:
-
-- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
-- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
-
-Значение по умолчанию: `0`.
-
-## input_format_arrow_import_nested {#input_format_arrow_import_nested}
-
-Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow).
-
-Возможные значения:
-
-- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
-- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
-
-Значение по умолчанию: `0`.
-
-## input_format_orc_import_nested {#input_format_orc_import_nested}
-
-Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc).
-
-Возможные значения:
-
-- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
-- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
-
-Значение по умолчанию: `0`.
-
## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions}
Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md).
@@ -1686,7 +1653,7 @@ SELECT * FROM table_with_enum_column_for_csv_insert;
## input_format_csv_detect_header {#input_format_csv_detect_header}
Обнаружить заголовок с именами и типами в формате CSV.
-
+
Значение по умолчанию - `true`.
## input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines}
@@ -1727,6 +1694,12 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in
" string "
```
+## input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
+
+Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию.
+
+Выключено по умолчанию.
+
## output_format_tsv_crlf_end_of_line {#settings-output-format-tsv-crlf-end-of-line}
Использовать в качестве разделителя строк для TSV формата CRLF (DOC/Windows стиль) вместо LF (Unix стиль).
@@ -4195,6 +4168,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
### Шаблон
Шаблон поддерживает следующие виды плейсхолдеров:
+- `%a` — Полное исходное имя файла (например "sample.csv").
- `%f` — Исходное имя файла без расширения (например "sample").
- `%e` — Оригинальное расширение файла с точкой (например ".csv").
- `%t` — Текущее время (в микросекундах).
@@ -4206,3 +4180,29 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
- Запрос: `SELECT * FROM file('sample.csv')`
Если чтение и обработка `sample.csv` прошли успешно, файл будет переименован в `processed_sample_1683473210851438.csv`.
+
+## precise_float_parsing {#precise_float_parsing}
+
+Позволяет выбрать алгоритм, используемый при парсинге [Float32/Float64](../../sql-reference/data-types/float.md):
+* Если установлено значение `1`, то используется точный метод. Он более медленный, но всегда возвращает число, наиболее близкое к входному значению.
+* В противном случае используется быстрый метод (поведение по умолчанию). Обычно результат его работы совпадает с результатом, полученным точным методом, однако в редких случаях он может отличаться на 1 или 2 наименее значимых цифры.
+
+Возможные значения: `0`, `1`.
+
+Значение по умолчанию: `0`.
+
+Пример:
+
+```sql
+SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0;
+
+┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
+│ 1.7090999999999998 │ 15008753.000000002 │
+└─────────────────────┴──────────────────────────┘
+
+SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1;
+
+┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
+│ 1.7091 │ 15008753 │
+└─────────────────────┴──────────────────────────┘
+```
diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md
index 7ff368b1910..24f79cae212 100644
--- a/docs/ru/operations/system-tables/index.md
+++ b/docs/ru/operations/system-tables/index.md
@@ -45,6 +45,10 @@ sidebar_label: "Системные таблицы"
ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024
-->
7500
+ 1048576
+ 8192
+ 524288
+ false
```
diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md
index e8d4a3ee9fd..80d844a1713 100644
--- a/docs/ru/sql-reference/data-types/datetime.md
+++ b/docs/ru/sql-reference/data-types/datetime.md
@@ -122,6 +122,7 @@ FROM dt
- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format)
- [Настройка `date_time_output_format`](../../operations/settings/index.md)
- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
+- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone)
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)
- [Тип данных `Date`](date.md)
- [Тип данных `DateTime64`](datetime64.md)
diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md
index da2f81f4828..78ad43e4764 100644
--- a/docs/ru/sql-reference/data-types/datetime64.md
+++ b/docs/ru/sql-reference/data-types/datetime64.md
@@ -102,6 +102,7 @@ FROM dt;
- [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format)
- [Настройка `date_time_output_format`](../../operations/settings/settings.md)
- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
+- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone)
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)
- [Тип данных `Date`](date.md)
- [Тип данных `DateTime`](datetime.md)
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index c43323d68fd..439eddfd752 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -145,6 +145,8 @@ range([start, ] end [, step])
- Если в результате запроса создаются массивы суммарной длиной больше, чем количество элементов, указанное настройкой [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block), то генерируется исключение.
+- Возвращает Null если любой аргумент Nullable(Nothing) типа. Генерируется исключение если любой аргумент Null (Nullable(T) тип).
+
**Примеры**
Запрос:
diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md
index 17ab04b7799..4db8a1ec6f8 100644
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@@ -599,29 +599,33 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we
## toYearWeek(date[,mode]) {#toyearweek}
Возвращает год и неделю для даты. Год в результате может отличаться от года в аргументе даты для первой и последней недели года.
-Аргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). Если mode не задан, используется режим 0.
+Аргумент mode работает так же, как аргумент mode [toWeek()](#toweek), значение mode по умолчанию -- `0`.
-`toISOYear() ` эквивалентно `intDiv(toYearWeek(date,3),100)`.
+`toISOYear() ` эквивалентно `intDiv(toYearWeek(date,3),100)`
+
+:::warning
+Однако, есть отличие в работе функций `toWeek()` и `toYearWeek()`. `toWeek()` возвращает номер недели в контексте заданного года, и в случае, когда `toWeek()` вернёт `0`, `toYearWeek()` вернёт значение, соответствующее последней неделе предыдущего года (см. `prev_yearWeek` в примере).
+:::
**Пример**
Запрос:
```sql
-SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9;
+SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9, toYearWeek(toDate('2022-01-01')) AS prev_yearWeek;
```
Результат:
```text
-┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐
-│ 2016-12-27 │ 201652 │ 201652 │ 201701 │
-└────────────┴───────────┴───────────┴───────────┘
+┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┬─prev_yearWeek─┐
+│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ 202152 │
+└────────────┴───────────┴───────────┴───────────┴───────────────┘
```
## age
-Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду.
+Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
**Синтаксис**
@@ -635,6 +639,8 @@ age('unit', startdate, enddate, [timezone])
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
+ - `microsecond` (возможные сокращения: `us`, `u`)
+ - `millisecond` (возможные сокращения: `ms`)
- `second` (возможные сокращения: `ss`, `s`)
- `minute` (возможные сокращения: `mi`, `n`)
- `hour` (возможные сокращения: `hh`, `h`)
@@ -708,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone])
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
+ - `microsecond` (возможные сокращения: `us`, `u`)
+ - `millisecond` (возможные сокращения: `ms`)
- `second` (возможные сокращения: `ss`, `s`)
- `minute` (возможные сокращения: `mi`, `n`)
- `hour` (возможные сокращения: `hh`, `h`)
diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md
index 9638e25d488..276dfc2ef20 100644
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@@ -1113,3 +1113,50 @@ A text with tags .
The content within CDATA
Do Nothing for 2 Minutes 2:00
```
+
+## initcap {#initcap}
+
+Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами.
+
+## initcapUTF8 {#initcapUTF8}
+
+Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8.
+Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
+Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
+Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
+
+## firstLine
+
+Возвращает первую строку в многострочном тексте.
+
+**Синтаксис**
+
+```sql
+firstLine(val)
+```
+
+**Аргументы**
+
+- `val` - текст для обработки. [String](../data-types/string.md)
+
+**Returned value**
+
+- Первая строка текста или весь текст, если переносы строк отсутствуют.
+
+Тип: [String](../data-types/string.md)
+
+**Пример**
+
+Запрос:
+
+```sql
+select firstLine('foo\nbar\nbaz');
+```
+
+Результат:
+
+```result
+┌─firstLine('foo\nbar\nbaz')─┐
+│ foo │
+└────────────────────────────┘
+```
diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md
index ea4f90d4f66..6e3830869cd 100644
--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@@ -801,3 +801,55 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
│ 3 │
└────────────────────────────────────────────────────────────┘
```
+
+## hasSubsequence(haystack, needle) {#hasSubsequence}
+
+Возвращает 1 если needle является подпоследовательностью haystack, иначе 0.
+
+
+**Синтаксис**
+
+``` sql
+hasSubsequence(haystack, needle)
+```
+
+**Аргументы**
+
+- `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal).
+- `needle` — подпоследовательность, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal).
+
+**Возвращаемые значения**
+
+- 1, если
+- 0, если подстрока не найдена.
+
+Тип: `UInt8`.
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT hasSubsequence('garbage', 'arg') ;
+```
+
+Результат:
+
+``` text
+┌─hasSubsequence('garbage', 'arg')─┐
+│ 1 │
+└──────────────────────────────────┘
+```
+
+
+## hasSubsequenceCaseInsensitive
+
+Такая же, как и [hasSubsequence](#hasSubsequence), но работает без учета регистра.
+
+## hasSubsequenceUTF8
+
+Такая же, как и [hasSubsequence](#hasSubsequence) при допущении что `haystack` и `needle` содержат набор кодовых точек, представляющий текст в кодировке UTF-8.
+
+## hasSubsequenceCaseInsensitiveUTF8
+
+Такая же, как и [hasSubsequenceUTF8](#hasSubsequenceUTF8), но работает без учета регистра.
diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md
index d5e6246fe9e..e53104d8d71 100644
--- a/docs/ru/sql-reference/functions/type-conversion-functions.md
+++ b/docs/ru/sql-reference/functions/type-conversion-functions.md
@@ -284,7 +284,13 @@ toDateTime(expr[, time_zone ])
- `expr` — Значение для преобразования. [String](/docs/ru/sql-reference/data-types/string.md), [Int](/docs/ru/sql-reference/data-types/int-uint.md), [Date](/docs/ru/sql-reference/data-types/date.md) или [DateTime](/docs/ru/sql-reference/data-types/datetime.md).
- `time_zone` — Часовой пояс. [String](/docs/ru/sql-reference/data-types/string.md).
-Если `expr` является числом, оно интерпретируется как количество секунд от начала unix эпохи.
+:::note
+Если `expr` является числом, то оно интерпретируется как число секунд с начала Unix-эпохи (Unix Timestamp).
+
+Если же `expr` -- [строка (String)](/docs/ru/sql-reference/data-types/string.md), то оно может быть интерпретировано и как Unix Timestamp, и как строковое представление даты / даты со временем.
+Ввиду неоднозначности запрещён парсинг строк длиной 4 и меньше. Так, строка `'1999'` могла бы представлять собой как год (неполное строковое представление даты или даты со временем), так и Unix Timestamp.
+Строки длиной 5 символов и более не несут неоднозначности, а следовательно, их парсинг разрешён.
+:::
**Возвращаемое значение**
diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md
index a8ace213075..92be30b101a 100644
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
Синтаксис:
```sql
-ALTER TABLE table_name MODIFY column_name REMOVE property;
+ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Пример**
diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md
index d3846aac289..1a60dc0716c 100644
--- a/docs/ru/sql-reference/statements/create/view.md
+++ b/docs/ru/sql-reference/statements/create/view.md
@@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view).
:::
```sql
-CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
+CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
```
`LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md).
diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md
index 4fa6ac4ce66..747e36b8809 100644
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@@ -11,7 +11,7 @@ sidebar_label: INSERT INTO
**Синтаксис**
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
@@ -100,7 +100,7 @@ INSERT INTO t FORMAT TabSeparated
**Синтаксис**
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ...
```
Соответствие столбцов определяется их позицией в секции SELECT. При этом, их имена в выражении SELECT и в таблице для INSERT, могут отличаться. При необходимости выполняется приведение типов данных, эквивалентное соответствующему оператору CAST.
@@ -120,7 +120,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
**Синтаксис**
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
```
Используйте этот синтаксис, чтобы вставить данные из файла, который хранится на стороне **клиента**. `file_name` и `type` задаются в виде строковых литералов. [Формат](../../interfaces/formats.md) входного файла должен быть задан в секции `FORMAT`.
diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md
index 0983c51d954..83ef115aacd 100644
--- a/docs/ru/sql-reference/table-functions/file.md
+++ b/docs/ru/sql-reference/table-functions/file.md
@@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
- `?` — заменяет ровно один любой символ.
-- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
+- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
diff --git a/docs/zh/engines/table-engines/special/buffer.md b/docs/zh/engines/table-engines/special/buffer.md
index bb95ecdc583..f92a819f3c3 100644
--- a/docs/zh/engines/table-engines/special/buffer.md
+++ b/docs/zh/engines/table-engines/special/buffer.md
@@ -5,7 +5,7 @@ slug: /zh/engines/table-engines/special/buffer
缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。
- Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
+ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
引擎的参数:database,table - 要刷新数据的表。可以使用返回字符串的常量表达式而不是数据库名称。 num_layers - 并行层数。在物理上,该表将表示为 num_layers 个独立缓冲区。建议值为16。min_time,max_time,min_rows,max_rows,min_bytes,max_bytes - 从缓冲区刷新数据的条件。
diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md
deleted file mode 100644
index 4ce13b0f1d3..00000000000
--- a/docs/zh/getting-started/example-datasets/wikistat.md
+++ /dev/null
@@ -1,32 +0,0 @@
----
-slug: /zh/getting-started/example-datasets/wikistat
-sidebar_position: 17
-sidebar_label: WikiStat
----
-
-# WikiStat {#wikistat}
-
-参考: http://dumps.wikimedia.org/other/pagecounts-raw/
-
-创建表结构:
-
-``` sql
-CREATE TABLE wikistat
-(
- date Date,
- time DateTime,
- project String,
- subproject String,
- path String,
- hits UInt64,
- size UInt64
-) ENGINE = MergeTree(date, (path, time), 8192);
-```
-
-加载数据:
-
-``` bash
-$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt
-$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done
-$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done
-```
diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md
new file mode 120000
index 00000000000..2d429d00984
--- /dev/null
+++ b/docs/zh/getting-started/example-datasets/wikistat.md
@@ -0,0 +1 @@
+../../../en/getting-started/example-datasets/wikistat.md
\ No newline at end of file
diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
index eedc913cf82..3f42f3f8da4 100644
--- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
+++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
@@ -346,9 +346,7 @@ UserID.bin,URL.bin,和EventTime.bin是UserID
- 我们将主键列(UserID, URL)中的一些列值标记为橙色。
- 这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。
-
- 正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。
+ 这些橙色标记的列值是每个颗粒中第一行的主键列值。正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。
- 我们从0开始对行进行编号,以便与ClickHouse内部行编号方案对齐,该方案也用于记录消息。
:::
@@ -1071,13 +1069,6 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在
## 通过projections使用联合主键索引
-Projections目前是一个实验性的功能,因此我们需要告诉ClickHouse:
-
-```sql
-SET optimize_use_projections = 1;
-```
-
-
在原表上创建projection:
```sql
ALTER TABLE hits_UserID_URL
@@ -1096,10 +1087,12 @@ ALTER TABLE hits_UserID_URL
:::note
- 该projection正在创建一个隐藏表,该表的行顺序和主索引基于该projection的给定order BY子句
-- 我们使用MATERIALIZE关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表
+- `SHOW TABLES` 语句查询是不会列出这个隐藏表的
+- 我们使用`MATERIALIZE`关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表
- 如果在源表hits_UserID_URL中插入了新行,那么这些行也会自动插入到隐藏表中
- 查询总是(从语法上)针对源表hits_UserID_URL,但是如果隐藏表的行顺序和主索引允许更有效地执行查询,那么将使用该隐藏表
-- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同:
+- 请注意,投影(projections)不会使 `ORDER BY` 查询语句的效率更高,即使 `ORDER BY` 匹配上了 projection 的 `ORDER BY` 语句(请参阅:https://github.com/ClickHouse/ClickHouse/issues/47333)
+- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同:
@@ -1163,7 +1156,7 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引
```
-## 移除无效的主键列
+## 小结
带有联合主键(UserID, URL)的表的主索引对于加快UserID的查询过滤非常有用。但是,尽管URL列是联合主键的一部分,但该索引在加速URL查询过滤方面并没有提供显著的帮助。
@@ -1176,4 +1169,12 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引
但是,如果复合主键中的键列在基数上有很大的差异,那么查询按基数升序对主键列进行排序是有益的。
-主键键列之间的基数差越大,主键键列的顺序越重要。我们将在以后的文章中对此进行演示。请继续关注。
+主键键列之间的基数差得越大,主键中的列的顺序越重要。我们将在下一章节对此进行演示。
+
+# 高效地为键列排序
+
+TODO
+
+# 高效地识别单行
+
+TODO
diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md
index c7a0f355a92..f84768beccc 100644
--- a/docs/zh/interfaces/http.md
+++ b/docs/zh/interfaces/http.md
@@ -53,7 +53,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例:
``` text
-X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
+X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
+X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
+X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
显示字段信息:
@@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
%
@@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Absolute Path File
* Connection #0 to host localhost left intact
@@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Relative Path File
* Connection #0 to host localhost left intact
diff --git a/docs/zh/operations/optimizing-performance/profile-guided-optimization.md b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md
new file mode 120000
index 00000000000..31cb656bd99
--- /dev/null
+++ b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md
@@ -0,0 +1 @@
+../../../en/operations/optimizing-performance/profile-guided-optimization.md
\ No newline at end of file
diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md
index f6106d8734e..8e2cb389f04 100644
--- a/docs/zh/operations/server-configuration-parameters/settings.md
+++ b/docs/zh/operations/server-configuration-parameters/settings.md
@@ -455,7 +455,7 @@ SSL客户端/服务器配置。
- verificationMode – The method for checking the node’s certificates. Details are in the description of the [A.背景](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) 同学们 可能的值: `none`, `relaxed`, `strict`, `once`.
- verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value.
- loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \|
-- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`.
+- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`.
- cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. 可接受的值: `true`, `false`.
- sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. 始终建议使用此参数,因为如果服务器缓存会话,以及客户端请求缓存,它有助于避免出现问题。 默认值: `${application.name}`.
- sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions.
diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md
index 53dadc23c6d..e4b70322477 100644
--- a/docs/zh/sql-reference/functions/date-time-functions.md
+++ b/docs/zh/sql-reference/functions/date-time-functions.md
@@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone])
- `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
可能的值:
+ - `microsecond`
+ - `millisecond`
- `second`
- `minute`
- `hour`
diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md
index 8ce2d20a10c..bce0994ecd2 100644
--- a/docs/zh/sql-reference/statements/create/view.md
+++ b/docs/zh/sql-reference/statements/create/view.md
@@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中
使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。
```sql
-CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
+CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
```
实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。
diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md
index 9acc1655f9a..f80c0a8a8ea 100644
--- a/docs/zh/sql-reference/statements/insert-into.md
+++ b/docs/zh/sql-reference/statements/insert-into.md
@@ -8,7 +8,7 @@ INSERT INTO 语句主要用于向系统中添加数据.
查询的基本格式:
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
您可以在查询中指定要插入的列的列表,如:`[(c1, c2, c3)]`。您还可以使用列[匹配器](../../sql-reference/statements/select/index.md#asterisk)的表达式,例如`*`和/或[修饰符](../../sql-reference/statements/select/index.md#select-modifiers),例如 [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier)。
@@ -71,7 +71,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set
例如,下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同:
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ...
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ...
```
ClickHouse会清除数据前所有的空白字符与一个换行符(如果有换行符的话)。所以在进行查询时,我们建议您将数据放入到输入输出格式名称后的新的一行中去(如果数据是以空白字符开始的,这将非常重要)。
@@ -93,7 +93,7 @@ INSERT INTO t FORMAT TabSeparated
### 使用`SELECT`的结果写入 {#inserting-the-results-of-select}
``` sql
-INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
+INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ...
```
写入与SELECT的列的对应关系是使用位置来进行对应的,尽管它们在SELECT表达式与INSERT中的名称可能是不同的。如果需要,会对它们执行对应的类型转换。
diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service
index 7742d8b278a..42dc5bd380d 100644
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@@ -29,6 +29,7 @@ EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
+AmbientCapabilities=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).
diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml
index 018e88ef828..66299fddd4a 100644
--- a/packages/clickhouse-server.yaml
+++ b/packages/clickhouse-server.yaml
@@ -55,6 +55,9 @@ contents:
- src: clickhouse
dst: /usr/bin/clickhouse-keeper
type: symlink
+- src: clickhouse
+ dst: /usr/bin/clickhouse-keeper-client
+ type: symlink
- src: root/usr/bin/clickhouse-report
dst: /usr/bin/clickhouse-report
- src: root/usr/bin/clickhouse-server
diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index 466a0c194f7..c5acd10f791 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -1,8 +1,6 @@
#include
#include
-#include
#include
-#include
#include
#include
#include
@@ -18,9 +16,7 @@
#include
#include
#include
-#include
#include
-#include
#include
#include
#include
@@ -38,8 +34,6 @@
#include
-namespace fs = std::filesystem;
-
/** A tool for evaluating ClickHouse performance.
* The tool emulates a case with fixed amount of simultaneously executing queries.
*/
@@ -79,7 +73,6 @@ public:
bool randomize_,
size_t max_iterations_,
double max_time_,
- const String & json_path_,
size_t confidence_,
const String & query_id_,
const String & query_to_execute_,
@@ -98,7 +91,6 @@ public:
cumulative(cumulative_),
max_iterations(max_iterations_),
max_time(max_time_),
- json_path(json_path_),
confidence(confidence_),
query_id(query_id_),
query_to_execute(query_to_execute_),
@@ -165,9 +157,6 @@ public:
int main(const std::vector &) override
{
- if (!json_path.empty() && fs::exists(json_path)) /// Clear file with previous results
- fs::remove(json_path);
-
readQueries();
runBenchmark();
return 0;
@@ -197,7 +186,6 @@ private:
bool cumulative;
size_t max_iterations;
double max_time;
- String json_path;
size_t confidence;
String query_id;
String query_to_execute;
@@ -226,26 +214,23 @@ private:
size_t read_bytes = 0;
size_t result_rows = 0;
size_t result_bytes = 0;
- double work_time = 0;
using Sampler = ReservoirSampler;
Sampler sampler {1 << 16};
- void add(double seconds, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc)
+ void add(double duration, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc)
{
++queries;
- work_time += seconds;
read_rows += read_rows_inc;
read_bytes += read_bytes_inc;
result_rows += result_rows_inc;
result_bytes += result_bytes_inc;
- sampler.insert(seconds);
+ sampler.insert(duration);
}
void clear()
{
queries = 0;
- work_time = 0;
read_rows = 0;
read_bytes = 0;
result_rows = 0;
@@ -331,10 +316,13 @@ private:
return false;
}
- if (delay > 0 && delay_watch.elapsedSeconds() > delay)
+ double seconds = delay_watch.elapsedSeconds();
+ if (delay > 0 && seconds > delay)
{
printNumberOfQueriesExecuted(queries_executed);
- cumulative ? report(comparison_info_total) : report(comparison_info_per_interval);
+ cumulative
+ ? report(comparison_info_total, total_watch.elapsedSeconds())
+ : report(comparison_info_per_interval, seconds);
delay_watch.restart();
}
}
@@ -350,16 +338,7 @@ private:
try
{
for (size_t i = 0; i < concurrency; ++i)
- {
- EntryPtrs connection_entries;
- connection_entries.reserve(connections.size());
-
- for (const auto & connection : connections)
- connection_entries.emplace_back(std::make_shared(
- connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings))));
-
- pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); });
- }
+ pool.scheduleOrThrowOnError([this]() mutable { thread(); });
}
catch (...)
{
@@ -389,21 +368,18 @@ private:
pool.wait();
total_watch.stop();
- if (!json_path.empty())
- reportJSON(comparison_info_total, json_path);
-
printNumberOfQueriesExecuted(queries_executed);
- report(comparison_info_total);
+ report(comparison_info_total, total_watch.elapsedSeconds());
}
- void thread(EntryPtrs & connection_entries)
+ void thread()
{
Query query;
/// Randomly choosing connection index
pcg64 generator(randomSeed());
- std::uniform_int_distribution distribution(0, connection_entries.size() - 1);
+ std::uniform_int_distribution distribution(0, connections.size() - 1);
/// In these threads we do not accept INT signal.
sigset_t sig_set;
@@ -423,15 +399,13 @@ private:
extracted = queue.tryPop(query, 100);
if (shutdown || (max_iterations && queries_executed == max_iterations))
- {
return;
- }
}
const auto connection_index = distribution(generator);
try
{
- execute(connection_entries, query, connection_index);
+ execute(query, connection_index);
consecutive_errors = 0;
}
catch (...)
@@ -460,17 +434,18 @@ private:
}
}
- void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index)
+ void execute(Query & query, size_t connection_index)
{
Stopwatch watch;
- Connection & connection = **connection_entries[connection_index];
+ ConnectionPool::Entry entry = connections[connection_index]->get(
+ ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings));
if (reconnect)
- connection.disconnect();
+ entry->disconnect();
RemoteQueryExecutor executor(
- connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
+ *entry, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
if (!query_id.empty())
executor.setQueryId(query_id);
@@ -485,19 +460,19 @@ private:
executor.finish();
- double seconds = (display_client_side_time || progress.elapsed_ns == 0)
+ double duration = (display_client_side_time || progress.elapsed_ns == 0)
? watch.elapsedSeconds()
: progress.elapsed_ns / 1e9;
std::lock_guard lock(mutex);
size_t info_index = round_robin ? 0 : connection_index;
- comparison_info_per_interval[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes);
- comparison_info_total[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes);
- t_test.add(info_index, seconds);
+ comparison_info_per_interval[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes);
+ comparison_info_total[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes);
+ t_test.add(info_index, duration);
}
- void report(MultiStats & infos)
+ void report(MultiStats & infos, double seconds)
{
std::lock_guard lock(mutex);
@@ -510,8 +485,6 @@ private:
if (0 == info->queries)
return;
- double seconds = info->work_time / concurrency;
-
std::string connection_description = connections[i]->getDescription();
if (round_robin)
{
@@ -525,10 +498,10 @@ private:
}
std::cerr
<< connection_description << ", "
- << "queries " << info->queries << ", ";
+ << "queries: " << info->queries << ", ";
if (info->errors)
{
- std::cerr << "errors " << info->errors << ", ";
+ std::cerr << "errors: " << info->errors << ", ";
}
std::cerr
<< "QPS: " << (info->queries / seconds) << ", "
@@ -567,62 +540,6 @@ private:
}
}
- void reportJSON(MultiStats & infos, const std::string & filename)
- {
- WriteBufferFromFile json_out(filename);
-
- std::lock_guard lock(mutex);
-
- auto print_key_value = [&](auto key, auto value, bool with_comma = true)
- {
- json_out << double_quote << key << ": " << value << (with_comma ? ",\n" : "\n");
- };
-
- auto print_percentile = [&json_out](Stats & info, auto percent, bool with_comma = true)
- {
- json_out << "\"" << percent << "\": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n");
- };
-
- json_out << "{\n";
-
- for (size_t i = 0; i < infos.size(); ++i)
- {
- const auto & info = infos[i];
-
- json_out << double_quote << connections[i]->getDescription() << ": {\n";
- json_out << double_quote << "statistics" << ": {\n";
-
- double seconds = info->work_time / concurrency;
-
- print_key_value("QPS", info->queries.load() / seconds);
- print_key_value("RPS", info->read_rows / seconds);
- print_key_value("MiBPS", info->read_bytes / seconds / 1048576);
- print_key_value("RPS_result", info->result_rows / seconds);
- print_key_value("MiBPS_result", info->result_bytes / seconds / 1048576);
- print_key_value("num_queries", info->queries.load());
- print_key_value("num_errors", info->errors, false);
-
- json_out << "},\n";
- json_out << double_quote << "query_time_percentiles" << ": {\n";
-
- if (info->queries != 0)
- {
- for (int percent = 0; percent <= 90; percent += 10)
- print_percentile(*info, percent);
-
- print_percentile(*info, 95);
- print_percentile(*info, 99);
- print_percentile(*info, 99.9);
- print_percentile(*info, 99.99, false);
- }
-
- json_out << "}\n";
- json_out << (i == infos.size() - 1 ? "}\n" : "},\n");
- }
-
- json_out << "}\n";
- }
-
public:
~Benchmark() override
@@ -675,7 +592,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
("iterations,i", value()->default_value(0), "amount of queries to be executed")
("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit")
("randomize,r", "randomize order of execution")
- ("json", value()->default_value(""), "write final report to specified file in JSON format")
("host,h", value()->multitoken(), "list of hosts")
("port", value()->multitoken(), "list of ports")
("roundrobin", "Instead of comparing queries for different --host/--port just pick one random --host/--port for every query and send query to it.")
@@ -739,7 +655,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
options.count("randomize"),
options["iterations"].as(),
options["timelimit"].as(),
- options["json"].as(),
options["confidence"].as(),
options["query_id"].as(),
options["query"].as(),
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 19b601b9a7b..e73f77819ad 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -812,6 +812,11 @@ bool Client::processWithFuzzing(const String & full_query)
}
catch (...)
{
+ if (!ast_to_process)
+ fmt::print(stderr,
+ "Error while forming new query: {}\n",
+ getCurrentExceptionMessage(true));
+
// Some functions (e.g. protocol parsers) don't throw, but
// set last_exception instead, so we'll also do it here for
// uniformity.
@@ -1173,12 +1178,12 @@ void Client::processOptions(const OptionsDescription & options_description,
{
String traceparent = options["opentelemetry-traceparent"].as();
String error;
- if (!global_context->getClientInfo().client_trace_context.parseTraceparentHeader(traceparent, error))
+ if (!global_context->getClientTraceContext().parseTraceparentHeader(traceparent, error))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse OpenTelemetry traceparent '{}': {}", traceparent, error);
}
if (options.count("opentelemetry-tracestate"))
- global_context->getClientInfo().client_trace_context.tracestate = options["opentelemetry-tracestate"].as();
+ global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as();
}
@@ -1238,10 +1243,9 @@ void Client::processConfig()
global_context->getSettingsRef().max_insert_block_size);
}
- ClientInfo & client_info = global_context->getClientInfo();
- client_info.setInitialQuery();
- client_info.quota_key = config().getString("quota_key", "");
- client_info.query_kind = query_kind;
+ global_context->setQueryKindInitial();
+ global_context->setQuotaClientKey(config().getString("quota_key", ""));
+ global_context->setQueryKind(query_kind);
}
@@ -1404,10 +1408,9 @@ void Client::readArguments(
else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-')))
{
common_arguments.emplace_back(arg);
- /// No password was provided by user. Add '\n' as implicit password,
- /// which encodes that client should ask user for the password.
- /// '\n' is used because there is hardly a chance that a user would use '\n' as a password.
- common_arguments.emplace_back("\n");
+ /// if the value of --password is omitted, the password will be asked before
+ /// connection start
+ common_arguments.emplace_back(ConnectionParameters::ASK_PASSWORD);
}
else
common_arguments.emplace_back(arg);
diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index efe7121cace..556eca808f6 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -608,6 +608,8 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
ss << "ALTER TABLE " << getQuotedTable(original_table) << ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") << partition_name;
UInt64 num_shards_drop_partition = executeQueryOnCluster(task_table.cluster_push, ss.str(), task_cluster->settings_push, ClusterExecutionMode::ON_EACH_SHARD);
+ if (num_shards_drop_partition != task_table.cluster_push->getShardCount())
+ return TaskStatus::Error;
LOG_INFO(log, "Drop partition {} in original table {} have been executed successfully on {} shards of {}",
partition_name, getQuotedTable(original_table), num_shards_drop_partition, task_table.cluster_push->getShardCount());
diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp
index 52d1a2196a9..86188fb6db1 100644
--- a/programs/disks/ICommand.cpp
+++ b/programs/disks/ICommand.cpp
@@ -1,4 +1,6 @@
#include "ICommand.h"
+#include
+
namespace DB
{
diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp
index 43c66a32302..d7d61bbcd3b 100644
--- a/programs/format/Format.cpp
+++ b/programs/format/Format.cpp
@@ -163,13 +163,15 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
{
ASTPtr res = parseQueryAndMovePosition(
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
- /// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail,
- /// should throw exception early and make exception message more readable.
+
+ /// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure,
+ /// we should throw an exception early, and make exception message more readable.
if (const auto * insert_query = res->as(); insert_query && insert_query->data)
{
throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA,
"Can't format ASTInsertQuery with data, since data will be lost");
}
+
if (!quiet)
{
if (!backslash)
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index d83e189f7ef..e10a9fea86b 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -20,10 +20,7 @@
#include
#include
#include
-#include
-#include
#include
-#include
#include
#include
#include
@@ -35,6 +32,14 @@
#include
+#include
+
+#include "config.h"
+
+/// Embedded configuration files used inside the install program
+INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml");
+INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml");
+
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
* It also allows to avoid dependency on systemd, upstart, SysV init.
@@ -560,7 +565,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(main_config_file))
{
- std::string_view main_config_content = getResource("config.xml");
+ std::string_view main_config_content(reinterpret_cast(gresource_config_xmlData), gresource_config_xmlSize);
if (main_config_content.empty())
{
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
@@ -672,7 +677,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(users_config_file))
{
- std::string_view users_config_content = getResource("users.xml");
+ std::string_view users_config_content(reinterpret_cast(gresource_users_xmlData), gresource_users_xmlSize);
if (users_config_content.empty())
{
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());
@@ -992,7 +997,9 @@ namespace
{
/// sudo respects limits in /etc/security/limits.conf e.g. open files,
/// that's why we are using it instead of the 'clickhouse su' tool.
- command = fmt::format("sudo -u '{}' {}", user, command);
+ /// by default, sudo resets all the ENV variables, but we should preserve
+ /// the values /etc/default/clickhouse in /etc/init.d/clickhouse file
+ command = fmt::format("sudo --preserve-env -u '{}' {}", user, command);
}
fmt::print("Will run {}\n", command);
diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp
index 05928a0d20b..c822a631798 100644
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@@ -1,5 +1,6 @@
#include "Commands.h"
+#include
#include "KeeperClient.h"
@@ -24,8 +25,18 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
else
path = client->cwd;
- for (const auto & child : client->zookeeper->getChildren(path))
- std::cout << child << " ";
+ auto children = client->zookeeper->getChildren(path);
+ std::sort(children.begin(), children.end());
+
+ bool need_space = false;
+ for (const auto & child : children)
+ {
+ if (std::exchange(need_space, true))
+ std::cout << " ";
+
+ std::cout << child;
+ }
+
std::cout << "\n";
}
@@ -77,7 +88,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
client->zookeeper->set(
client->getAbsolutePath(query->args[0].safeGet()),
query->args[1].safeGet(),
- static_cast(query->args[2].safeGet()));
+ static_cast(query->args[2].get()));
}
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const
@@ -115,6 +126,21 @@ void CreateCommand::execute(const ASTKeeperQuery * query, KeeperClient * client)
static_cast(query->args[2].safeGet()));
}
+bool TouchCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const
+{
+ String arg;
+ if (!parseKeeperPath(pos, expected, arg))
+ return false;
+ node->args.push_back(std::move(arg));
+
+ return true;
+}
+
+void TouchCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+ client->zookeeper->createIfNotExists(client->getAbsolutePath(query->args[0].safeGet()), "");
+}
+
bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const
{
String arg;
@@ -130,6 +156,173 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet())) << "\n";
}
+bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const
+{
+ String arg;
+ if (!parseKeeperPath(pos, expected, arg))
+ return true;
+
+ node->args.push_back(std::move(arg));
+ return true;
+}
+
+void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+ Coordination::Stat stat;
+ String path;
+ if (!query->args.empty())
+ path = client->getAbsolutePath(query->args[0].safeGet